Java 类名:com.alibaba.alink.operator.batch.finance.GroupScorecardTrainBatchOp
Python 类名:GroupScorecardTrainBatchOp
分群评分卡训练
名称 | 中文名称 | 描述 | 类型 | 是否必须? | 取值范围 | 默认值 |
---|---|---|---|---|---|---|
groupCols | 分组列名,多列 | 分组列名,多列,必选 | String数组 | ✓ | ||
labelCol | 标签列名 | 输入表中的标签列名 | String | ✓ | ||
selectedCols | 选择的列名 | 计算列对应的列名列表 | String数组 | ✓ | ||
alphaEntry | 筛选阈值 | 筛选阈值 | Double | 0.0500 | ||
alphaStay | 移除阈值 | 移除阈值 | Double | 0.0500 | ||
constOptimMethod | 优化方法 | 求解优化问题时选择的优化方法 | ConstOptimMethod | SQP, Barrier, LBFGS, Newton, ALM | SQP | |
defaultWoe | 默认Woe,在woe为Nan或NULL时替换 | 默认Woe,在woe为Nan或NULL时替换 | Double | NaN | ||
encode | 编码方法 | 编码方法 | Encode | WOE, ASSEMBLED_VECTOR, NULL | ASSEMBLED_VECTOR | |
epsilon | 收敛阈值 | 迭代方法的终止判断阈值,默认值为 1.0e-6 | Double | x >= 0.0 | 0.0000 | |
forceSelectedCols | 强制选择的列 | 强制选择的列 | String数组 | |||
l1 | L1 正则化系数 | L1 正则化系数,默认为0。 | Double | x >= 0.0 | 0.0000 | |
l2 | L2 正则化系数 | L2 正则化系数,默认为0。 | Double | x >= 0.0 | 0.0000 | |
linearModelType | 线性模型 | 线性模型 | LinearModelType | LR, LinearReg, Divergence | LR | |
maxIter | 最大迭代步数 | 最大迭代步数,默认为 100 | Integer | x >= 1 | 100 | |
maxLeaves | 叶节点的最多个数 | 叶节点的最多个数 | Integer | 2147483647 | ||
minOutcomeSamplesPerLeaf | 叶节点的最小正负样本个数 | 叶节点的最小正负样本个数 | Integer | 500 | ||
minSamplesPerLeaf | 叶节点的最小样本个数 | 叶节点的最小样本个数 | Integer | 500 | ||
odds | 分数基准点处的odds值 | 分数基准点处的odds值 | Double | null | ||
pdo | 分数增长pdo,odds加倍 | 分数增长pdo,odds加倍 | Double | null | ||
positiveLabelValueString | 正样本 | 正样本对应的字符串格式。 | String | 1 | ||
scaleInfo | 是否将模型进行分数转换 | 是否将模型进行分数转换 | Boolean | false | ||
scaledValue | 分数基准点 | 分数基准点 | Double | null | ||
treeSplitMeasure | 树节点分裂标准 | 树节点分裂标准 | TreeMeasure | AUC, KS | AUC | |
weightCol | 权重列名 | 权重列对应的列名 | String | null | ||
withSelector | 是否逐步回归 | 是否逐步回归 | Boolean | false |
package com.alibaba.alink.operator.batch.finance; import org.apache.flink.types.Row; import com.alibaba.alink.operator.batch.BatchOperator; import com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp; import com.alibaba.alink.operator.batch.feature.BinningPredictBatchOp; import com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp; import com.alibaba.alink.operator.batch.source.MemSourceBatchOp; import com.alibaba.alink.params.feature.HasEncode.Encode; import com.alibaba.alink.testutil.AlinkTestBase; import org.junit.Before; import org.junit.Test; import java.util.Arrays; public class GroupScorecardTrainBatchOpTest extends AlinkTestBase { @Test public void test() throws Exception { //BatchOperator.setParallelism(1); BinningTrainBatchOp binningTrainBatchOp = new BinningTrainBatchOp() .setSelectedCols(lrColNames); binningTrainBatchOp.linkFrom(lrData); GroupScorecardTrainBatchOp gcTrainOp = new GroupScorecardTrainBatchOp() .setGroupCols(groupColNames) .setSelectedCols(selectedColNames) .setLabelCol(labelColName) .setMinSamplesPerLeaf(2); gcTrainOp.linkFrom(lrData, binningTrainBatchOp, binningTrainBatchOp); gcTrainOp.getOutputTable().printSchema(); GroupScorecardPredictBatchOp predOp = new GroupScorecardPredictBatchOp() .setPredictionScoreCol("pred_score") .setPredictionDetailCol("pred_detail") .setCalculateScorePerFeature(true); predOp.linkFrom(gcTrainOp, lrData).lazyPrint(); BatchOperator.execute(); } private static final Row[] array = new Row[] { Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1), Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1), Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1), Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1), Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1), Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1), Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1), Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1), Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1), Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1), Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1), Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1), Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0), Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0) }; private static final String[] groupColNames = new String[] {"f0", "f1"}; private static final String[] selectedColNames = new String[] {"f2", "f3", "f4", "f5", "f6", "f7"}; private static final String[] lrColNames = new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"}; private static final String labelColName = "label"; private BatchOperator <?> lrData; @Before public void init() { lrData = new MemSourceBatchOp( Arrays.asList(array), new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "label"}); } }