NodeIndexer训练(NodeIndexerTrainBatchOp)

Java 类名:com.alibaba.alink.operator.batch.graph.NodeIndexerTrainBatchOp

Python 类名:NodeIndexerTrainBatchOp

功能介绍

NodeIndexer训练

参数说明

名称 中文名称 描述 类型 是否必须? 取值范围 默认值
selectedCols 选中的列名数组 计算列对应的列名列表 String数组 null

代码示例

Java 代码

package com.alibaba.alink.operator.batch.graph;

import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.source.MemSourceBatchOp;
import com.alibaba.alink.testutil.AlinkTestBase;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.types.Row;
import org.junit.Before;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

public class NodeIndexerTrainBatchOpTest extends AlinkTestBase {

	List <Row> originalData;
	List <Tuple2 <String, Long>> expectedIdMapping;
	List <Row> expectedMappedDataSet;
	List <long[]> randomWalks;
	List <String> expectedRemappedString;

	@Before
	public void before() {
		originalData = new ArrayList <>();
		originalData.add(Row.of("Alice", "Lisa", 1.));
		originalData.add(Row.of("Lisa", "Karry", 2.));
		originalData.add(Row.of("Karry", "Bella", 3.));
		originalData.add(Row.of("Bella", "Lucy", 4.));
		originalData.add(Row.of("Lucy", "Bob", 5.));
		originalData.add(Row.of("John", "Bob", 6.));
		originalData.add(Row.of("John", "Stella", 7.));
		originalData.add(Row.of("John", "Kate", 8.));
		originalData.add(Row.of("Kate", "Stella", 9.));
		originalData.add(Row.of("Kate", "Jack", 10.));
		originalData.add(Row.of("Jess", "Jack", 11.));

		expectedIdMapping = new ArrayList <>();
		expectedIdMapping.add(Tuple2.of("John", 7L));
		expectedIdMapping.add(Tuple2.of("Karry", 9L));
		expectedIdMapping.add(Tuple2.of("Lisa", 13L));
		expectedIdMapping.add(Tuple2.of("Bella", 0L));
		expectedIdMapping.add(Tuple2.of("Bob", 2L));
		expectedIdMapping.add(Tuple2.of("Stella", 4L));
		expectedIdMapping.add(Tuple2.of("Alice", 1L));
		expectedIdMapping.add(Tuple2.of("Jack", 3L));
		expectedIdMapping.add(Tuple2.of("Jess", 5L));
		expectedIdMapping.add(Tuple2.of("Kate", 11L));
		expectedIdMapping.add(Tuple2.of("Lucy", 15L));

		expectedMappedDataSet = new ArrayList <>();
		expectedMappedDataSet.add(Row.of(13L, 9L, 2.0));
		expectedMappedDataSet.add(Row.of(1L, 13L, 1.0));
		expectedMappedDataSet.add(Row.of(9L, 0L, 3.0));
		expectedMappedDataSet.add(Row.of(7L, 2L, 6.0));
		expectedMappedDataSet.add(Row.of(15L, 2L, 5.0));
		expectedMappedDataSet.add(Row.of(7L, 4L, 7.0));
		expectedMappedDataSet.add(Row.of(11L, 4L, 9.0));
		expectedMappedDataSet.add(Row.of(5L, 3L, 11.0));
		expectedMappedDataSet.add(Row.of(11L, 3L, 10.0));
		expectedMappedDataSet.add(Row.of(7L, 11L, 8.0));
		expectedMappedDataSet.add(Row.of(0L, 15L, 4.0));

		randomWalks = new ArrayList <>();
		randomWalks.add(new long[] {1, 13, 9, 0, 15});
		randomWalks.add(new long[] {15, 2, 7, 4});
		randomWalks.add(new long[] {7, 11, 3, 5});

		expectedRemappedString = new ArrayList <>();
		expectedRemappedString.add("Alice Lisa Karry Bella Lucy");
		expectedRemappedString.add("Lucy Bob John Stella");
		expectedRemappedString.add("John Kate Jack Jess");
	}

	@Test
	public void test1() throws Exception {
		func(
			new MemSourceBatchOp(originalData, "source string, target string, value double")
		);
	}
    
	private void func(BatchOperator <?> input) throws Exception {
		input.lazyPrint("\n输入\n");

		BatchOperator <?> nodeIndexer = new NodeIndexerTrainBatchOp()
			.setSelectedCols("source", "target")
			.linkFrom(input);

		nodeIndexer.lazyPrint("\n< nodeIndexer > \n ");

		BatchOperator <?> nodeToIndex = new NodeToIndexBatchOp()
			.setSelectedCols("source", "target")
			.linkFrom(nodeIndexer, input);

		nodeToIndex.lazyPrint("\n< nodeToIndex > \n");

		new IndexToNodeBatchOp()
			.setSelectedCols("source", "target")
			.linkFrom(nodeIndexer, nodeToIndex)
			.print("\n< indexToNode >");
	}

}

输出结果

输入

source target value
Alice Lisa 1.0000
Lisa Karry 2.0000
Karry Bella 3.0000
Bella Lucy 4.0000
Lucy Bob 5.0000
John Bob 6.0000
John Stella 7.0000
John Kate 8.0000
Kate Stella 9.0000
Kate Jack 10.0000
Jess Jack 11.0000

nodeIndexer

node id
Bella 0
Bob 1
Stella 2
Alice 3
Jack 4
Jess 5
John 6
Karry 7
Kate 8
Lisa 9
Lucy 10

nodeToIndex

source target value
3 9 1.0000
9 7 2.0000
7 0 3.0000
0 10 4.0000
10 1 5.0000
6 1 6.0000
6 2 7.0000
6 8 8.0000
8 2 9.0000
8 4 10.0000
5 4 11.0000

indexToNode

source target value
Alice Lisa 1.0000
Lisa Karry 2.0000
Karry Bella 3.0000
Bella Lucy 4.0000
Lucy Bob 5.0000
John Bob 6.0000
John Stella 7.0000
John Kate 8.0000
Kate Stella 9.0000
Kate Jack 10.0000
Jess Jack 11.0000