Java 类名:com.alibaba.alink.operator.batch.finance.GroupScorecardTrainBatchOp
Python 类名:GroupScorecardTrainBatchOp
分群评分卡训练
| 名称 | 中文名称 | 描述 | 类型 | 是否必须? | 取值范围 | 默认值 |
|---|---|---|---|---|---|---|
| groupCols | 分组列名,多列 | 分组列名,多列,必选 | String数组 | ✓ | ||
| labelCol | 标签列名 | 输入表中的标签列名 | String | ✓ | ||
| selectedCols | 选择的列名 | 计算列对应的列名列表 | String数组 | ✓ | ||
| alphaEntry | 筛选阈值 | 筛选阈值 | Double | 0.0500 | ||
| alphaStay | 移除阈值 | 移除阈值 | Double | 0.0500 | ||
| constOptimMethod | 优化方法 | 求解优化问题时选择的优化方法 | ConstOptimMethod | SQP, Barrier, LBFGS, Newton, ALM | SQP | |
| defaultWoe | 默认Woe,在woe为Nan或NULL时替换 | 默认Woe,在woe为Nan或NULL时替换 | Double | NaN | ||
| encode | 编码方法 | 编码方法 | Encode | WOE, ASSEMBLED_VECTOR, NULL | ASSEMBLED_VECTOR | |
| epsilon | 收敛阈值 | 迭代方法的终止判断阈值,默认值为 1.0e-6 | Double | x >= 0.0 | 0.0000 | |
| forceSelectedCols | 强制选择的列 | 强制选择的列 | String数组 | |||
| l1 | L1 正则化系数 | L1 正则化系数,默认为0。 | Double | x >= 0.0 | 0.0000 | |
| l2 | L2 正则化系数 | L2 正则化系数,默认为0。 | Double | x >= 0.0 | 0.0000 | |
| linearModelType | 线性模型 | 线性模型 | LinearModelType | LR, LinearReg, Divergence | LR | |
| maxIter | 最大迭代步数 | 最大迭代步数,默认为 100 | Integer | x >= 1 | 100 | |
| maxLeaves | 叶节点的最多个数 | 叶节点的最多个数 | Integer | 2147483647 | ||
| minOutcomeSamplesPerLeaf | 叶节点的最小正负样本个数 | 叶节点的最小正负样本个数 | Integer | 500 | ||
| minSamplesPerLeaf | 叶节点的最小样本个数 | 叶节点的最小样本个数 | Integer | 500 | ||
| odds | 分数基准点处的odds值 | 分数基准点处的odds值 | Double | null | ||
| pdo | 分数增长pdo,odds加倍 | 分数增长pdo,odds加倍 | Double | null | ||
| positiveLabelValueString | 正样本 | 正样本对应的字符串格式。 | String | 1 | ||
| scaleInfo | 是否将模型进行分数转换 | 是否将模型进行分数转换 | Boolean | false | ||
| scaledValue | 分数基准点 | 分数基准点 | Double | null | ||
| treeSplitMeasure | 树节点分裂标准 | 树节点分裂标准 | TreeMeasure | AUC, KS | AUC | |
| weightCol | 权重列名 | 权重列对应的列名 | String | null | ||
| withSelector | 是否逐步回归 | 是否逐步回归 | Boolean | false | ||
package com.alibaba.alink.operator.batch.finance;
import org.apache.flink.types.Row;
import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp;
import com.alibaba.alink.operator.batch.feature.BinningPredictBatchOp;
import com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp;
import com.alibaba.alink.operator.batch.source.MemSourceBatchOp;
import com.alibaba.alink.params.feature.HasEncode.Encode;
import com.alibaba.alink.testutil.AlinkTestBase;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
public class GroupScorecardTrainBatchOpTest extends AlinkTestBase {
@Test
public void test() throws Exception {
//BatchOperator.setParallelism(1);
BinningTrainBatchOp binningTrainBatchOp = new BinningTrainBatchOp()
.setSelectedCols(lrColNames);
binningTrainBatchOp.linkFrom(lrData);
GroupScorecardTrainBatchOp gcTrainOp = new GroupScorecardTrainBatchOp()
.setGroupCols(groupColNames)
.setSelectedCols(selectedColNames)
.setLabelCol(labelColName)
.setMinSamplesPerLeaf(2);
gcTrainOp.linkFrom(lrData, binningTrainBatchOp, binningTrainBatchOp);
gcTrainOp.getOutputTable().printSchema();
GroupScorecardPredictBatchOp predOp = new GroupScorecardPredictBatchOp()
.setPredictionScoreCol("pred_score")
.setPredictionDetailCol("pred_detail")
.setCalculateScorePerFeature(true);
predOp.linkFrom(gcTrainOp, lrData).lazyPrint();
BatchOperator.execute();
}
private static final Row[] array = new Row[] {
Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0)
};
private static final String[] groupColNames = new String[] {"f0", "f1"};
private static final String[] selectedColNames = new String[] {"f2", "f3", "f4", "f5", "f6", "f7"};
private static final String[] lrColNames = new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"};
private static final String labelColName = "label";
private BatchOperator <?> lrData;
@Before
public void init() {
lrData = new MemSourceBatchOp(
Arrays.asList(array), new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "label"});
}
}