分群评分卡训练(GroupScorecardTrainBatchOp)

Java 类名：com.alibaba.alink.operator.batch.finance.GroupScorecardTrainBatchOp

Python 类名：GroupScorecardTrainBatchOp

功能介绍

分群评分卡训练

参数说明

名称	中文名称	描述	类型	是否必须？	取值范围	默认值
groupCols	分组列名，多列	分组列名，多列，必选	String数组	✓
labelCol	标签列名	输入表中的标签列名	String	✓
selectedCols	选择的列名	计算列对应的列名列表	String数组	✓
alphaEntry	筛选阈值	筛选阈值	Double			0.0500
alphaStay	移除阈值	移除阈值	Double			0.0500
constOptimMethod	优化方法	求解优化问题时选择的优化方法	ConstOptimMethod		SQP, Barrier, LBFGS, Newton, ALM	SQP
defaultWoe	默认Woe，在woe为Nan或NULL时替换	默认Woe，在woe为Nan或NULL时替换	Double			NaN
encode	编码方法	编码方法	Encode		WOE, ASSEMBLED_VECTOR, NULL	ASSEMBLED_VECTOR
epsilon	收敛阈值	迭代方法的终止判断阈值，默认值为 1.0e-6	Double		x >= 0.0	0.0000
forceSelectedCols	强制选择的列	强制选择的列	String数组
l1	L1 正则化系数	L1 正则化系数，默认为0。	Double		x >= 0.0	0.0000
l2	L2 正则化系数	L2 正则化系数，默认为0。	Double		x >= 0.0	0.0000
linearModelType	线性模型	线性模型	LinearModelType		LR, LinearReg, Divergence	LR
maxIter	最大迭代步数	最大迭代步数，默认为 100	Integer		x >= 1	100
maxLeaves	叶节点的最多个数	叶节点的最多个数	Integer			2147483647
minOutcomeSamplesPerLeaf	叶节点的最小正负样本个数	叶节点的最小正负样本个数	Integer			500
minSamplesPerLeaf	叶节点的最小样本个数	叶节点的最小样本个数	Integer			500
odds	分数基准点处的odds值	分数基准点处的odds值	Double			null
pdo	分数增长pdo，odds加倍	分数增长pdo，odds加倍	Double			null
positiveLabelValueString	正样本	正样本对应的字符串格式。	String			1
scaleInfo	是否将模型进行分数转换	是否将模型进行分数转换	Boolean			false
scaledValue	分数基准点	分数基准点	Double			null
treeSplitMeasure	树节点分裂标准	树节点分裂标准	TreeMeasure		AUC, KS	AUC
weightCol	权重列名	权重列对应的列名	String			null
withSelector	是否逐步回归	是否逐步回归	Boolean			false

代码示例

Java 代码

package com.alibaba.alink.operator.batch.finance;

import org.apache.flink.types.Row;

import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp;
import com.alibaba.alink.operator.batch.feature.BinningPredictBatchOp;
import com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp;
import com.alibaba.alink.operator.batch.source.MemSourceBatchOp;
import com.alibaba.alink.params.feature.HasEncode.Encode;
import com.alibaba.alink.testutil.AlinkTestBase;
import org.junit.Before;
import org.junit.Test;

import java.util.Arrays;

public class GroupScorecardTrainBatchOpTest extends AlinkTestBase {

    @Test
    public void test() throws Exception {
        //BatchOperator.setParallelism(1);
        BinningTrainBatchOp binningTrainBatchOp = new BinningTrainBatchOp()
            .setSelectedCols(lrColNames);
        binningTrainBatchOp.linkFrom(lrData);

        GroupScorecardTrainBatchOp gcTrainOp = new GroupScorecardTrainBatchOp()
            .setGroupCols(groupColNames)
            .setSelectedCols(selectedColNames)
            .setLabelCol(labelColName)
            .setMinSamplesPerLeaf(2);

        gcTrainOp.linkFrom(lrData, binningTrainBatchOp, binningTrainBatchOp);

        gcTrainOp.getOutputTable().printSchema();

        GroupScorecardPredictBatchOp predOp = new GroupScorecardPredictBatchOp()
            .setPredictionScoreCol("pred_score")
            .setPredictionDetailCol("pred_detail")
            .setCalculateScorePerFeature(true);

        predOp.linkFrom(gcTrainOp, lrData).lazyPrint();

        BatchOperator.execute();

    }

    private static final Row[] array = new Row[] {
        Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
        Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
        Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
        Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
        Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1),
        Row.of(1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1),
        Row.of(2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1),
        Row.of(3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0),
        Row.of(4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0)
    };

    private static final String[] groupColNames = new String[] {"f0", "f1"};

    private static final String[] selectedColNames = new String[] {"f2", "f3", "f4", "f5", "f6", "f7"};
    private static final String[] lrColNames = new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"};

    private static final String labelColName = "label";
    private BatchOperator <?> lrData;

    @Before
    public void init() {
        lrData = new MemSourceBatchOp(
            Arrays.asList(array), new String[] {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "label"});
    }

}