Java 类名:com.alibaba.alink.operator.batch.graph.NodeIndexerTrainBatchOp
Python 类名:NodeIndexerTrainBatchOp
NodeIndexer训练
| 名称 | 中文名称 | 描述 | 类型 | 是否必须? | 取值范围 | 默认值 |
|---|---|---|---|---|---|---|
| selectedCols | 选中的列名数组 | 计算列对应的列名列表 | String数组 | null | ||
package com.alibaba.alink.operator.batch.graph;
import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.source.MemSourceBatchOp;
import com.alibaba.alink.testutil.AlinkTestBase;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.types.Row;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
public class NodeIndexerTrainBatchOpTest extends AlinkTestBase {
List <Row> originalData;
List <Tuple2 <String, Long>> expectedIdMapping;
List <Row> expectedMappedDataSet;
List <long[]> randomWalks;
List <String> expectedRemappedString;
@Before
public void before() {
originalData = new ArrayList <>();
originalData.add(Row.of("Alice", "Lisa", 1.));
originalData.add(Row.of("Lisa", "Karry", 2.));
originalData.add(Row.of("Karry", "Bella", 3.));
originalData.add(Row.of("Bella", "Lucy", 4.));
originalData.add(Row.of("Lucy", "Bob", 5.));
originalData.add(Row.of("John", "Bob", 6.));
originalData.add(Row.of("John", "Stella", 7.));
originalData.add(Row.of("John", "Kate", 8.));
originalData.add(Row.of("Kate", "Stella", 9.));
originalData.add(Row.of("Kate", "Jack", 10.));
originalData.add(Row.of("Jess", "Jack", 11.));
expectedIdMapping = new ArrayList <>();
expectedIdMapping.add(Tuple2.of("John", 7L));
expectedIdMapping.add(Tuple2.of("Karry", 9L));
expectedIdMapping.add(Tuple2.of("Lisa", 13L));
expectedIdMapping.add(Tuple2.of("Bella", 0L));
expectedIdMapping.add(Tuple2.of("Bob", 2L));
expectedIdMapping.add(Tuple2.of("Stella", 4L));
expectedIdMapping.add(Tuple2.of("Alice", 1L));
expectedIdMapping.add(Tuple2.of("Jack", 3L));
expectedIdMapping.add(Tuple2.of("Jess", 5L));
expectedIdMapping.add(Tuple2.of("Kate", 11L));
expectedIdMapping.add(Tuple2.of("Lucy", 15L));
expectedMappedDataSet = new ArrayList <>();
expectedMappedDataSet.add(Row.of(13L, 9L, 2.0));
expectedMappedDataSet.add(Row.of(1L, 13L, 1.0));
expectedMappedDataSet.add(Row.of(9L, 0L, 3.0));
expectedMappedDataSet.add(Row.of(7L, 2L, 6.0));
expectedMappedDataSet.add(Row.of(15L, 2L, 5.0));
expectedMappedDataSet.add(Row.of(7L, 4L, 7.0));
expectedMappedDataSet.add(Row.of(11L, 4L, 9.0));
expectedMappedDataSet.add(Row.of(5L, 3L, 11.0));
expectedMappedDataSet.add(Row.of(11L, 3L, 10.0));
expectedMappedDataSet.add(Row.of(7L, 11L, 8.0));
expectedMappedDataSet.add(Row.of(0L, 15L, 4.0));
randomWalks = new ArrayList <>();
randomWalks.add(new long[] {1, 13, 9, 0, 15});
randomWalks.add(new long[] {15, 2, 7, 4});
randomWalks.add(new long[] {7, 11, 3, 5});
expectedRemappedString = new ArrayList <>();
expectedRemappedString.add("Alice Lisa Karry Bella Lucy");
expectedRemappedString.add("Lucy Bob John Stella");
expectedRemappedString.add("John Kate Jack Jess");
}
@Test
public void test1() throws Exception {
func(
new MemSourceBatchOp(originalData, "source string, target string, value double")
);
}
private void func(BatchOperator <?> input) throws Exception {
input.lazyPrint("\n输入\n");
BatchOperator <?> nodeIndexer = new NodeIndexerTrainBatchOp()
.setSelectedCols("source", "target")
.linkFrom(input);
nodeIndexer.lazyPrint("\n< nodeIndexer > \n ");
BatchOperator <?> nodeToIndex = new NodeToIndexBatchOp()
.setSelectedCols("source", "target")
.linkFrom(nodeIndexer, input);
nodeToIndex.lazyPrint("\n< nodeToIndex > \n");
new IndexToNodeBatchOp()
.setSelectedCols("source", "target")
.linkFrom(nodeIndexer, nodeToIndex)
.print("\n< indexToNode >");
}
}
输入
| source | target | value |
|---|---|---|
| Alice | Lisa | 1.0000 |
| Lisa | Karry | 2.0000 |
| Karry | Bella | 3.0000 |
| Bella | Lucy | 4.0000 |
| Lucy | Bob | 5.0000 |
| John | Bob | 6.0000 |
| John | Stella | 7.0000 |
| John | Kate | 8.0000 |
| Kate | Stella | 9.0000 |
| Kate | Jack | 10.0000 |
| Jess | Jack | 11.0000 |
nodeIndexer
| node | id |
|---|---|
| Bella | 0 |
| Bob | 1 |
| Stella | 2 |
| Alice | 3 |
| Jack | 4 |
| Jess | 5 |
| John | 6 |
| Karry | 7 |
| Kate | 8 |
| Lisa | 9 |
| Lucy | 10 |
nodeToIndex
| source | target | value |
|---|---|---|
| 3 | 9 | 1.0000 |
| 9 | 7 | 2.0000 |
| 7 | 0 | 3.0000 |
| 0 | 10 | 4.0000 |
| 10 | 1 | 5.0000 |
| 6 | 1 | 6.0000 |
| 6 | 2 | 7.0000 |
| 6 | 8 | 8.0000 |
| 8 | 2 | 9.0000 |
| 8 | 4 | 10.0000 |
| 5 | 4 | 11.0000 |
indexToNode
| source | target | value |
|---|---|---|
| Alice | Lisa | 1.0000 |
| Lisa | Karry | 2.0000 |
| Karry | Bella | 3.0000 |
| Bella | Lucy | 4.0000 |
| Lucy | Bob | 5.0000 |
| John | Bob | 6.0000 |
| John | Stella | 7.0000 |
| John | Kate | 8.0000 |
| Kate | Stella | 9.0000 |
| Kate | Jack | 10.0000 |
| Jess | Jack | 11.0000 |