分群评分卡训练(GroupScorecardTrainBatchOp)

Java 类名:com.alibaba.alink.operator.batch.finance.GroupScorecardTrainBatchOp

Python 类名:GroupScorecardTrainBatchOp

功能介绍

分群评分卡训练

参数说明

名称 中文名称 描述 类型 是否必须? 取值范围 默认值
groupCols 分组列名,多列 分组列名,多列,必选 String数组
labelCol 标签列名 输入表中的标签列名 String
selectedCols 选择的列名 计算列对应的列名列表 String数组
alphaEntry 筛选阈值 筛选阈值 Double 0.0500
alphaStay 移除阈值 移除阈值 Double 0.0500
constOptimMethod 优化方法 求解优化问题时选择的优化方法 ConstOptimMethod SQP, Barrier, LBFGS, Newton, ALM SQP
defaultWoe 默认Woe,在woe为Nan或NULL时替换 默认Woe,在woe为Nan或NULL时替换 Double NaN
encode 编码方法 编码方法 Encode WOE, ASSEMBLED_VECTOR, NULL ASSEMBLED_VECTOR
epsilon 收敛阈值 迭代方法的终止判断阈值,默认值为 1.0e-6 Double x >= 0.0 0.0000
forceSelectedCols 强制选择的列 强制选择的列 String数组
l1 L1 正则化系数 L1 正则化系数,默认为0。 Double x >= 0.0 0.0000
l2 L2 正则化系数 L2 正则化系数,默认为0。 Double x >= 0.0 0.0000
linearModelType 线性模型 线性模型 LinearModelType LR, LinearReg, Divergence LR
maxIter 最大迭代步数 最大迭代步数,默认为 100 Integer x >= 1 100
maxLeaves 叶节点的最多个数 叶节点的最多个数 Integer 2147483647
minOutcomeSamplesPerLeaf 叶节点的最小正负样本个数 叶节点的最小正负样本个数 Integer 500
minSamplesPerLeaf 叶节点的最小样本个数 叶节点的最小样本个数 Integer 500
odds 分数基准点处的odds值 分数基准点处的odds值 Double null
pdo 分数增长pdo,odds加倍 分数增长pdo,odds加倍 Double null
positiveLabelValueString 正样本 正样本对应的字符串格式。 String 1
scaleInfo 是否将模型进行分数转换 是否将模型进行分数转换 Boolean false
scaledValue 分数基准点 分数基准点 Double null
treeSplitMeasure 树节点分裂标准 树节点分裂标准 TreeMeasure AUC, KS AUC
weightCol 权重列名 权重列对应的列名 String null
withSelector 是否逐步回归 是否逐步回归 Boolean false

代码示例

Java 代码

package com.alibaba.alink.operator.batch.finance;

import org.apache.flink.types.Row;

import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp;
import com.alibaba.alink.operator.batch.feature.BinningPredictBatchOp;
import com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp;
import com.alibaba.alink.operator.batch.source.MemSourceBatchOp;
import com.alibaba.alink.params.feature.HasEncode.Encode;
import com.alibaba.alink.testutil.AlinkTestBase;
import org.junit.Before;
import org.junit.Test;

import java.util.Arrays;

public class GroupScorecardTrainBatchOpTest extends AlinkTestBase {

    @Test
    public void test() throws Exception {
        //BatchOperator.setParallelism(1);
        BinningTrainBatchOp binningTrainBatchOp = new BinningTrainBatchOp()
            .setSelectedCols(lrColNames);
        binningTrainBatchOp.linkFrom(lrData);

        GroupScorecardTrainBatchOp gcTrainOp = new GroupScorecardTrainBatchOp()
            .setGroupCols(groupColNames)
            .setSelectedCols(selectedColNames)
            .setLabelCol(labelColName)
            .setMinSamplesPerLeaf(2);

        gcTrainOp.linkFrom(lrData, binningTrainBatchOp, binningTrainBatchOp);

        gcTrainOp.getOutputTable().printSchema();

        GroupScorecardPredictBatchOp predOp = new GroupScorecardPredictBatchOp()
            .setPredictionScoreCol("pred_score")
            .setPredictionDetailCol("pred_detail")
            .setCalculateScorePerFeature(true);

        predOp.linkFrom(gcTrainOp, lrData).lazyPrint();

        BatchOperator.execute();

    }

    private static final Row[] array = new Row[] {
        Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
        Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
        Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
        Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
        Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
        Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
        Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0)
    };

    private static final String[] groupColNames = new String[] {"f0", "f1"};

    private static final String[] selectedColNames = new String[] {"f2", "f3", "f4", "f5", "f6", "f7"};
    private static final String[] lrColNames = new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"};

    private static final String labelColName = "label";
    private BatchOperator <?> lrData;

    @Before
    public void init() {
        lrData = new MemSourceBatchOp(
            Arrays.asList(array), new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "label"});
    }

}