├── apply
    ├── bin
    │   ├── batchRun.sh
    │   ├── deploy.sh
    │   ├── lzo.sh
    │   └── start.sh
    ├── dependency-reduced-pom.xml
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── lakala
    │       │   │       └── audit
    │       │   │           └── rabbitmqMsg
    │       │   │               ├── consumer
    │       │   │                   └── Receiver.java
    │       │   │               ├── entityV
    │       │   │                   └── RequestMessageV.java
    │       │   │               └── produce
    │       │   │                   └── Sender.java
    │       ├── resources
    │       │   ├── dev
    │       │   │   └── config.properties
    │       │   ├── extract_data_hql
    │       │   ├── log4j.xml
    │       │   ├── product
    │       │   │   ├── config.properties
    │       │   │   ├── hdfs-site.xml
    │       │   │   └── hive-site.xml
    │       │   └── test
    │       │   │   └── config.properties
    │       └── scala
    │       │   ├── ApplyPageRank.scala
    │       │   ├── CastToInt.scala
    │       │   ├── ExploreLPAData.scala
    │       │   ├── GraphOneDegreeApplyPerDiem.scala
    │       │   ├── GraphxBSP.scala
    │       │   ├── JudgeIsMobile.scala
    │       │   ├── LoadCallhistoryData.scala
    │       │   ├── LoadHiveData.scala
    │       │   ├── LoadHiveData2.scala
    │       │   ├── RunGraphx.scala
    │       │   ├── RunLoadApplyGraphx.scala
    │       │   ├── RunLoadApplyGraphx2.scala
    │       │   ├── RunLoadApplyGraphx3.scala
    │       │   ├── TestSql.scala
    │       │   ├── com
    │       │       └── lakala
    │       │       │   └── datacenter
    │       │       │       ├── abstractions
    │       │       │           └── PregelProgram.scala
    │       │       │       ├── apply
    │       │       │           ├── buildGraph
    │       │       │           │   ├── BuildGraphData.scala
    │       │       │           │   ├── GraphOperators.scala
    │       │       │           │   └── NewEdgeArr.scala
    │       │       │           └── model
    │       │       │           │   ├── ApplyInfo.scala
    │       │       │           │   ├── BaseEntity.scala
    │       │       │           │   ├── CallHistoryEntity.scala
    │       │       │           │   ├── EdgeEntity.scala
    │       │       │           │   └── NDegreeEntity.scala
    │       │       │       ├── faund
    │       │       │           ├── ApplyRandomForest.scala
    │       │       │           ├── DatasetTitanic.scala
    │       │       │           ├── ScalaRandomForest.scala
    │       │       │           ├── SparkConfUtil.scala
    │       │       │           └── Titanic.scala
    │       │       │       ├── grograms
    │       │       │           └── ApplyDegreeCentralityProgram.scala
    │       │       │       ├── grogress
    │       │       │           └── ExportNDegreeData.scala
    │       │       │       ├── jaccard
    │       │       │           ├── Jaccard.scala
    │       │       │           └── PowerIterationClustering.scala
    │       │       │       ├── louvain
    │       │       │           ├── HDFSLouvainRunner.scala
    │       │       │           ├── LouvainCore.scala
    │       │       │           ├── LouvainHarness.scala
    │       │       │           ├── VertexData.scala
    │       │       │           └── VertexState.scala
    │       │       │       ├── main
    │       │       │           ├── Analytics.scala
    │       │       │           ├── CallHistoryPageRank.scala
    │       │       │           ├── Driver.scala
    │       │       │           ├── LPAAlgorithm.scala
    │       │       │           ├── LPCoarseAlgorithm.scala
    │       │       │           ├── LiveCommunityDetection.scala
    │       │       │           ├── LouvainDGA.scala
    │       │       │           ├── PICCallAlgorithm.scala
    │       │       │           ├── PSCANAlgorithm.scala
    │       │       │           └── SemiSupervisedLabelPropagation.scala
    │       │       │       ├── talk
    │       │       │           ├── builtin
    │       │       │           │   └── ShortestPathSample.scala
    │       │       │           └── types
    │       │       │           │   ├── City.scala
    │       │       │           │   ├── Person.scala
    │       │       │           │   └── VertexAttribute.scala
    │       │       │       └── utils
    │       │       │           ├── SparkCommon.scala
    │       │       │           └── UtilsToos.scala
    │       │   └── edu
    │       │       └── gatech
    │       │           └── cse8803
    │       │               ├── clustering
    │       │                   └── PowerIterationClustering.scala
    │       │               ├── graphconstruct
    │       │                   └── GraphLoader.scala
    │       │               ├── ioutils
    │       │                   └── CSVUtils.scala
    │       │               ├── jaccard
    │       │                   └── Jaccard.scala
    │       │               ├── main
    │       │                   └── Main.scala
    │       │               ├── model
    │       │                   └── models.scala
    │       │               └── randomwalk
    │       │                   └── randomwalk.scala
    │   └── test
    │       └── scala
    │           ├── CollectionUtil.scala
    │           ├── CreateApplyData.scala
    │           ├── CreateApplyData2.scala
    │           ├── Driver.scala
    │           ├── EdgeTuplesTest.scala
    │           ├── GraphNdegUtil.scala
    │           ├── GraphXExample.scala
    │           ├── GraphxBSP.scala
    │           ├── GraphxBSP2.scala
    │           ├── GraphxBSP3.scala
    │           ├── Median.scala
    │           ├── NDegreeResult.scala
    │           ├── NNTest.scala
    │           ├── NumOnce.scala
    │           ├── ParsesTest.scala
    │           ├── TestCSV.scala
    │           ├── TestRunGraphx.scala
    │           ├── TrustRank.scala
    │           ├── UDF_test.scala
    │           ├── apply
    │               ├── NDegreeCallMiddlePath.scala
    │               └── NDegreeMiddlePathResult.scala
    │           ├── entity
    │               ├── CallEntity.scala
    │               ├── CallVertex.scala
    │               └── TwoDegree.scala
    │           └── utils
    │               ├── CollectionUtil.scala
    │               ├── GraphNdegUtil.scala
    │               └── GraphNdegUtil2.scala
├── common
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── css
    │       │   │   └── style.css
    │       └── scala
    │       │   └── com
    │       │       └── lakala
    │       │           └── datacenter
    │       │               └── common
    │       │                   ├── graphstream
    │       │                       └── SimpleGraphViewer.scala
    │       │                   └── utils
    │       │                       └── DateTimeUtils.scala
    │   └── test
    │       ├── data
    │           ├── cities_edges.txt
    │           ├── cities_vertices.txt
    │           ├── likeness_edges.txt
    │           ├── maxvalue_edges.txt
    │           ├── maxvalue_vertices.txt
    │           ├── papers_edges.txt
    │           ├── people_vertices.txt
    │           ├── relationships_edges.txt
    │           ├── us_cities_edges.txt
    │           ├── us_cities_vertices.txt
    │           ├── users_dense_edges.txt
    │           ├── users_disjoint_edges.txt
    │           ├── users_edges.txt
    │           └── users_vertices.txt
    │       └── scala
    │           └── TestGraphViewer.scala
├── core
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── lakala
    │       │   │       └── datacenter
    │       │   │           └── core
    │       │   │               ├── config
    │       │   │                   └── ConfigurationLoader.java
    │       │   │               ├── hdfs
    │       │   │                   └── FileUtil.java
    │       │   │               ├── messaging
    │       │   │                   ├── Sender.java
    │       │   │                   └── Worker.java
    │       │   │               ├── models
    │       │   │                   ├── PartitionDescription.java
    │       │   │                   ├── ProcessorMessage.java
    │       │   │                   └── ProcessorMode.java
    │       │   │               └── processor
    │       │   │                   └── GraphProcessor.java
    │       └── scala
    │       │   └── com
    │       │       └── lakala
    │       │           └── datacenter
    │       │               └── core
    │       │                   ├── abstractions
    │       │                       └── PregelProgram.scala
    │       │                   ├── algorithms
    │       │                       └── Algorithms.scala
    │       │                   ├── grograms
    │       │                       ├── BetweennessCentralityProgram.scala
    │       │                       ├── EdgeBetweennessProgram.scala
    │       │                       ├── MaximumValueProgram.scala
    │       │                       └── ShortestPathProgram.scala
    │       │                   └── utils
    │       │                       └── UtilsToos.scala
    │   └── test
    │       ├── java
    │           └── com
    │           │   └── lakala
    │           │       └── datacenter
    │           │           └── core
    │           │               ├── hdfs
    │           │                   └── FileUtilTest.java
    │           │               ├── messaging
    │           │                   └── SenderTest.java
    │           │               └── processor
    │           │                   └── GraphProcessorTest.java
    │       └── scala
    │           └── com
    │               └── lakala
    │                   └── datacenter
    │                       └── core
    │                           └── grograms
    │                               ├── GraphProcessorTest.scala
    │                               ├── ShortestPathProgramTests.scala
    │                               └── ShortestPathTests.scala
├── neo4j
    ├── bin
    │   └── start2.sh
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── lakala
    │       │   │       └── datacenter
    │       │   │           └── enums
    │       │   │               ├── DataAttributeType.java
    │       │   │               ├── GraphEnum.java
    │       │   │               ├── Labels.java
    │       │   │               └── RelationshipTypes.java
    │       ├── resources
    │       │   ├── css
    │       │   │   └── style.css
    │       │   ├── dev
    │       │   │   ├── config.properties
    │       │   │   └── hive-site.xml
    │       │   ├── log4j.xml
    │       │   ├── product
    │       │   │   └── config.properties
    │       │   └── test
    │       │   │   └── config.properties
    │       └── scala
    │       │   └── com
    │       │       └── lakala
    │       │           └── datacenter
    │       │               ├── abstractions
    │       │                   └── DataGenerator.scala
    │       │               ├── constant
    │       │                   └── StreamingConstant.scala
    │       │               ├── cypher
    │       │                   └── NeoData.scala
    │       │               ├── grogram
    │       │                   └── Neo4jDataGenerator.scala
    │       │               ├── load
    │       │                   └── spark
    │       │                   │   ├── ClusterGraphDatabase.scala
    │       │                   │   ├── ExplortApplyData.scala
    │       │                   │   ├── ExplortApplyData2.scala
    │       │                   │   ├── LoadHiveData.scala
    │       │                   │   ├── Neo4j.scala
    │       │                   │   ├── Neo4jConfig.scala
    │       │                   │   ├── Neo4jDataFrame.scala
    │       │                   │   ├── Neo4jGraph.scala
    │       │                   │   ├── Neo4jJavaIntegration.scala
    │       │                   │   ├── Neo4jPartition.scala
    │       │                   │   ├── Neo4jRowRDD.scala
    │       │                   │   └── Neo4jTupleRDD.scala
    │       │               ├── main
    │       │                   ├── HandleTask.scala
    │       │                   ├── Main.scala
    │       │                   ├── MessageParam.scala
    │       │                   └── TrialConsumerKafka.scala
    │       │               ├── realtimeBuildGraphx
    │       │                   ├── MsgOffsetStreamListener.scala
    │       │                   ├── SendMsg.scala
    │       │                   └── SparkStreamingOnKafkaDirect.scala
    │       │               └── utils
    │       │                   ├── ArgsCommon.scala
    │       │                   ├── RedisUtils.scala
    │       │                   └── UtilsTools.scala
    │   └── test
    │       ├── java
    │           ├── ApplyInfoConsumer.java
    │           ├── ConsumerKafka.java
    │           ├── DataAttributeType.java
    │           ├── JavaKafkaSimpleConsumerAPI.java
    │           ├── JavaKafkaSimpleConsumerAPITest.java
    │           ├── KafkaBrokerInfo.java
    │           ├── KafkaConsumer.java
    │           ├── KafkaProducer.java
    │           ├── KafkaProducer2.java
    │           ├── KafkaTopicPartitionInfo.java
    │           ├── LogSession.java
    │           ├── OperatorKafka.java
    │           ├── SendKafkaMsgTest.java
    │           └── TestCypher.java
    │       └── scala
    │           ├── BroadcastAccumulatorStreaming.scala
    │           ├── ClientRedisTest.scala
    │           ├── CollectionUtil.scala
    │           ├── ConsumerGroupExample.scala
    │           ├── GraphNdegUtil.scala
    │           ├── Main.scala
    │           ├── StreamingFromKafka.scala
    │           ├── TestApiNeo4j.scala
    │           ├── TestCypher.scala
    │           ├── TestKafka.scala
    │           ├── TestRedis.scala
    │           └── org
    │               └── neo4j
    │                   └── spark
    │                       ├── ExplortApplyDataTest.scala
    │                       ├── MainTest.scala
    │                       ├── Neo4jContstanTest.scala
    │                       ├── Neo4jDataFrameScalaTest.scala
    │                       ├── Neo4jGraphScalaTest.scala
    │                       ├── Neo4jRestSparkTest.scala
    │                       └── Neo4jSparkTest.scala
└── pom.xml


/apply/bin/batchRun.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -lt 1 ] ; then
 4 | echo "USAGE: $0 begin_date [end_date]"
 5 | exit 1;
 6 | fi
 7 | 
 8 | beginDate=$1
 9 | yesterday=$(date --date="1 days ago" '+%Y-%m-%d')
10 | endDate=$yesterday
11 | if [ $# -gt 1 ] ; then
12 |     endDate=$2
13 | fi
14 | 
15 | beginTime=`date -d $beginDate '+%s'`
16 | yesterdayTime=`date -d $yesterday '+%s'`
17 | endTime=`date -d $endDate '+%s'`
18 | if [ $beginTime -gt $yesterdayTime ] ; then
19 |     echo "begin_date can only be yesterday[$endDate] at the latest"
20 |     exit 1;
21 | fi
22 | if [ $endTime -gt $yesterdayTime ] ; then
23 |     echo "end_date can only be yesterday[$yesterday] at the latest"
24 |     exit 1;
25 | fi
26 | if [ $beginTime -gt $endTime ] ; then
27 |     echo "begin_date can only be end_date[$endDate] at the latest"
28 |     exit 1;
29 | fi
30 | 
31 | #echo $beginDate
32 | #echo $endDate
33 | currentDate=$beginDate
34 | currentTime=$beginTime
35 | 
36 | cd "`dirname "$0"`"
37 | 
38 | while [ $currentTime -le $endTime ]
39 | do
40 |     #echo $currentDate
41 |     sh start.sh $currentDate
42 |     currentDate=`date -d "$currentDate +1 day" '+%Y-%m-%d'`
43 |     currentTime=`date -d $currentDate '+%s'`
44 | done
45 | 


--------------------------------------------------------------------------------
/apply/bin/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ $# != 1 ] ; then
 4 | echo "USAGE: $0 ENV(dev|test|product)"
 5 | exit 1;
 6 | fi
 7 | 
 8 | cd "$(cd "`dirname "$0"`"/../..; pwd)"
 9 | mvn -U clean package dependency:copy-dependencies -DskipTests -P$1 -Papply
10 | 


--------------------------------------------------------------------------------
/apply/bin/lzo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ####################################
 3 | ## lzo hadoop textfile
 4 | ## usage:sh lzo.sh source_dir
 5 | ## example:sh lzo.sh /user/flume
 6 | ####################################
 7 | startTime=`date +%s`
 8 | echo "the script begin at $(date +%H:%M:%S)"
 9 | source_dir=$1
10 | cd /tmp
11 | hadoop fs -get ${source_dir} /tmp
12 | filepaths=()
13 | function getfilePath(){
14 |     for file in ` ls $1 `
15 |     do
16 |         if [ -d $1"/"$file ]
17 |         then
18 |              getfilePath $1"/"$file
19 |         else
20 |              filepaths[${#filepaths[@]}]=$1"/"$file
21 |         fi
22 |     done
23 | }
24 | path=/tmp/${source_dir##*/}
25 | getfilePath $path
26 | #echo ${filepaths[*]}
27 | for filepath in ${filepaths[@]}
28 | do
29 |         lzop ${filepath}
30 |         rm -rf ${filepath}
31 | done
32 | hadoop fs -mv ${source_dir} ${source_dir}.bak
33 | hadoop fs -put $path ${source_dir%/*}
34 | for filepath in ${filepaths[@]}
35 | do
36 |         hadoop jar /usr/hdp/2.2.6.0-2800/hadoop/lib/hadoop-lzo-0.6.0.2.2.6.0-2800.jar com.hadoop.compression.lzo.LzoIndexer ${source_dir%/*}/${filepath#*/tmp/}.lzo
37 |         #2>&1 > /data/hdfs_logs/${source_dir##*/}.log
38 | done
39 | rm -rf $path
40 | endTime=`date +%s`
41 | echo "the script end at $(date +%H:%M:%S)"
42 | echo "total second is" $(($endTime-$startTime))


--------------------------------------------------------------------------------
/apply/bin/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ## usage: sh bin/start.sh -i /logs/device/* -d 2016-01-11
 3 | 
 4 | SPARK_HOME=/usr/hdp/current/spark-client
 5 | HIVE_HOME=/usr/hdp/current/hive-client
 6 | PROJECT_HOME="$(cd "`dirname "$0"`"/..; pwd)"
 7 | HDP_VERSION=2.4.0.0-169
 8 | APP_CACHE_DIR=/tmp/device
 9 | 
10 | stdate=${1:-`date -d '1 days ago' +"%Y-%m-%d"`}
11 | #inputdir=/logs/device/*
12 | #inputfile=/logs/device/*/2016-01-{1[1-9],2[0-1]}
13 | while getopts "d:i:" opt ; do
14 |  case $opt in
15 |   d)stdate=$OPTARG ;;
16 |   i)inputdir=$OPTARG ;;
17 |   ?)echo "==> please input arg: stdate(d), inputdir(i)" && exit 1 ;;
18 |  esac
19 | done
20 | 
21 | #echo "==> ready for geoip...."
22 | #hadoop fs -mkdir -p $APP_CACHE_DIR/geoip
23 | #hadoop fs -test -e $APP_CACHE_DIR/geoip/GeoLite2-City.mmdb
24 | #if [ $? -ne 0 ]; then
25 | #    echo "GeoLite2-City.mmdb not exists!"
26 | #    hadoop fs -put $PROJECT_HOME/../tcloud-log-analysis/src/main/bundleApp/coord-common/geoip/GeoLite2-City.mmdb $APP_CACHE_DIR/geoip/
27 | #fi
28 | 
29 | ## https://issues.apache.org/jira/browse/ZEPPELIN-93
30 | ## https://github.com/caskdata/cdap/pull/4106
31 | spark-submit \
32 |  --class RunLoadApplyGraphx3 \
33 |  --master yarn \
34 |  --deploy-mode cluster \
35 |  --queue dc \
36 |  --driver-memory 2G \
37 |  --executor-memory 8G \
38 |  --num-executors 4 \
39 |  --executor-cores 3 \
40 |  --conf "spark.rpc.askTimeout=300s" \
41 |  --driver-java-options "-XX:-UseGCOverheadLimit -Xms2G -Xmx2G -XX:MaxPermSize=2G -Dhdp.version=$HDP_VERSION -Dspark.yarn.am.extraJavaOptions=-Dhdp.version=$HDP_VERSION" \
42 |  --verbose \
43 |  --files $PROJECT_HOME/target/classes/hive-site.xml \
44 |  --driver-class-path $PROJECT_HOME/target/dependency/mysql-connector-java-5.1.36.jar \
45 |  --jars $PROJECT_HOME/target/dependency/mysql-connector-java-5.1.36.jar,$SPARK_HOME/lib/datanucleus-api-jdo-3.2.6.jar,$SPARK_HOME/lib/datanucleus-core-3.2.10.jar,$SPARK_HOME/lib/datanucleus-rdbms-3.2.9.jar \
46 |  $PROJECT_HOME/target/data-analysis-sdk.jar \
47 |  $stdate
48 | 
49 | ## --packages com.databricks:spark-csv_2.10:1.3.0 \
50 | ## 2>&1 > output.txt


--------------------------------------------------------------------------------
/apply/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <parent>
 4 |     <artifactId>graphx-analysis</artifactId>
 5 |     <groupId>com.lakala.datacenter</groupId>
 6 |     <version>1.0.0-SNAPSHOT</version>
 7 |   </parent>
 8 |   <modelVersion>4.0.0</modelVersion>
 9 |   <artifactId>graphx-analysis-apply</artifactId>
10 |   <name>graphx-analysis-apply</name>
11 |   <url>http://maven.apache.org</url>
12 |   <build>
13 |     <finalName>graphx-analysis-apply</finalName>
14 |   </build>
15 |   <properties>
16 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
17 |   </properties>
18 | </project>
19 | 
20 | 


--------------------------------------------------------------------------------
/apply/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 |     <parent>
 4 |         <groupId>com.lakala.datacenter</groupId>
 5 | 		<artifactId>graphx-analysis</artifactId>
 6 |         <version>1.0.0-SNAPSHOT</version>
 7 |     </parent>
 8 |     <modelVersion>4.0.0</modelVersion>
 9 | 
10 |     <artifactId>graphx-analysis-apply</artifactId>
11 |     <packaging>jar</packaging>
12 | 
13 |     <name>graphx-analysis-apply</name>
14 |     <url>http://maven.apache.org</url>
15 | 
16 |     <properties>
17 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
18 |     </properties>
19 | 
20 |     <dependencies>
21 |         <!-- module -->
22 |         <dependency>
23 |             <groupId>com.lakala.datacenter</groupId>
24 |             <artifactId>graphx-analysis-core</artifactId>
25 |             <version>${project.version}</version>
26 |         </dependency>
27 |     </dependencies>
28 |     <build>
29 |         <finalName>graphx-analysis-apply</finalName>
30 |     </build>
31 | </project>
32 | 


--------------------------------------------------------------------------------
/apply/src/main/java/com/lakala/audit/rabbitmqMsg/consumer/Receiver.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.audit.rabbitmqMsg.consumer;
 2 | 
 3 | import com.google.gson.Gson;
 4 | import com.lakala.audit.rabbitmqMsg.entityV.RequestMessageV;
 5 | import com.rabbitmq.client.Channel;
 6 | import com.rabbitmq.client.Connection;
 7 | import com.rabbitmq.client.ConnectionFactory;
 8 | import com.rabbitmq.client.QueueingConsumer;
 9 | 
10 | import java.io.IOException;
11 | import java.util.concurrent.TimeoutException;
12 | 
13 | /**
14 |  * Created by Administrator on 2017/8/1 0001.
15 |  */
16 | public class Receiver {
17 |     private final static String AUDIT_QUEUE_NAME = "audit_mq";
18 |     //    private final static String USERNAME = "lys";
19 | //    private final static String PASSWORD = "123456";
20 | //    private final static String VIRTUALHOST = "/";
21 |     //    private final static String HOST = "localhost";
22 | 
23 |     private final static String HOST = "192.168.0.182";
24 |     private final static String USERNAME = "antifraud";
25 |     private final static String PASSWORD = "antifraud";
26 |     private final static String VIRTUALHOST = "antifraud";
27 |     private final static int PORTNUMBER = 5672;
28 | 
29 |     public static void main(String[] args) {
30 |         try {
31 |             work();
32 |         } catch (IOException e) {
33 |             e.printStackTrace();
34 |         } catch (InterruptedException e) {
35 |             e.printStackTrace();
36 |         } catch (TimeoutException e) {
37 |             e.printStackTrace();
38 |         }
39 | 
40 |     }
41 | 
42 |     public static void work() throws java.io.IOException,
43 |             java.lang.InterruptedException, TimeoutException {
44 |         ConnectionFactory factory = new ConnectionFactory();
45 | //        factory.setHost("192.168.0.182");
46 |         factory.setHost(HOST);
47 |         factory.setPort(PORTNUMBER);
48 |         factory.setUsername(USERNAME);
49 |         factory.setPassword(PASSWORD);
50 |         factory.setVirtualHost(VIRTUALHOST);
51 |         Connection connection = factory.newConnection();
52 |         Channel channel = connection.createChannel();
53 | 
54 |         channel.queueDeclare(AUDIT_QUEUE_NAME, false, false, false, null);
55 |         channel.basicQos(20);
56 | 
57 |         QueueingConsumer consumer = new QueueingConsumer(channel);
58 |         channel.basicConsume(AUDIT_QUEUE_NAME, false, consumer);
59 | 
60 |         System.out.println(" [*] Waiting for messages. To exit press CTRL+C");
61 | 
62 |         while (true) {
63 |             QueueingConsumer.Delivery delivery = consumer.nextDelivery();
64 |             String message = new String(delivery.getBody());
65 | 
66 |             System.out.println(" [x] Received '" + message + "'");
67 | 
68 |             Gson gson = new Gson();
69 |             RequestMessageV requestMessageV = gson.fromJson(message, RequestMessageV.class);
70 |             //TODO 数据解析放到redis
71 | 
72 |             System.out.println(requestMessageV.getOrderno());
73 |             System.out.println(" [x] Done '" + message + "'");
74 |             channel.basicAck(delivery.getEnvelope().getDeliveryTag(), false);
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/apply/src/main/java/com/lakala/audit/rabbitmqMsg/entityV/RequestMessageV.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.audit.rabbitmqMsg.entityV;
 2 | 
 3 | /**
 4 |  * Created by Administrator on 2017/8/1 0001.
 5 |  */
 6 | public class RequestMessageV {
 7 |     public RequestMessageV() {
 8 |     }
 9 | 
10 |     public RequestMessageV(String orderno, String statue) {
11 |         this.orderno = orderno;
12 |         this.statue = statue;
13 |     }
14 | 
15 |     String orderno;
16 |     String statue;
17 | 
18 |     public String getOrderno() {
19 |         return orderno;
20 |     }
21 | 
22 |     public void setOrderno(String orderno) {
23 |         this.orderno = orderno;
24 |     }
25 | 
26 |     public String getStatue() {
27 |         return statue;
28 |     }
29 | 
30 |     public void setStatue(String statue) {
31 |         this.statue = statue;
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/apply/src/main/java/com/lakala/audit/rabbitmqMsg/produce/Sender.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.audit.rabbitmqMsg.produce;
 2 | 
 3 | import com.google.gson.Gson;
 4 | import com.lakala.audit.rabbitmqMsg.entityV.RequestMessageV;
 5 | import com.rabbitmq.client.Channel;
 6 | import com.rabbitmq.client.Connection;
 7 | import com.rabbitmq.client.ConnectionFactory;
 8 | 
 9 | import java.io.IOException;
10 | import java.util.concurrent.TimeoutException;
11 | 
12 | 
13 | /**
14 |  * Created by Administrator on 2017/8/1 0001.
15 |  */
16 | public class Sender {
17 |     private final static String AUDIT_QUEUE_NAME = "audit_mq";
18 |     //    private final static String USERNAME = "lys";
19 | //    private final static String PASSWORD = "123456";
20 | //    private final static String VIRTUALHOST = "/";
21 |     //    private final static String HOST = "localhost";
22 | 
23 |     private final static String HOST = "192.168.0.182";
24 |     private final static String USERNAME = "antifraud";
25 |     private final static String PASSWORD = "antifraud";
26 |     private final static String VIRTUALHOST = "antifraud";
27 |     private final static int PORTNUMBER = 5672;
28 | 
29 |     public static void main(String[] args) {
30 |         Gson gson = new Gson();
31 |         RequestMessageV requestMessageV = new RequestMessageV("XNA20170505131153011496369566130", "Q");
32 |         String message = gson.toJson(requestMessageV);
33 |         System.out.println(message);
34 |         //message={"orderno":"XNA20170505131153011496369566130","statue":"Q"}
35 |         try {
36 |             send(message);
37 |         } catch (IOException e) {
38 |             e.printStackTrace();
39 |         } catch (InterruptedException e) {
40 |             e.printStackTrace();
41 |         } catch (TimeoutException e) {
42 |             e.printStackTrace();
43 |         }
44 | 
45 |     }
46 | 
47 |     public static void send(String message) throws java.io.IOException,
48 |             java.lang.InterruptedException, TimeoutException {
49 | 
50 |         ConnectionFactory factory = new ConnectionFactory();
51 |         factory.setHost(HOST);
52 |         factory.setPort(PORTNUMBER);
53 |         factory.setUsername(USERNAME);
54 |         factory.setPassword(PASSWORD);
55 |         factory.setVirtualHost(VIRTUALHOST);
56 |         Connection connection = factory.newConnection();
57 |         Channel channel = connection.createChannel();
58 |         channel.queueDeclare(AUDIT_QUEUE_NAME, false, false, false, null);
59 |         channel.basicPublish("", AUDIT_QUEUE_NAME, null, message.getBytes("UTF-8"));
60 |         System.out.println("已经发送消息....." + message);
61 |         channel.close();
62 |         connection.close();
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/apply/src/main/resources/dev/config.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luhm2017/graphx-analysis/9e3a96ec0df5da8208655face4ff0b0e6b3ed497/apply/src/main/resources/dev/config.properties


--------------------------------------------------------------------------------
/apply/src/main/resources/extract_data_hql:
--------------------------------------------------------------------------------
1 | use lkl_card_score;
2 |  set mapreduce.job.queuename=szoffline;
3 | -- 抽取出每个人的逾期数据
4 |  create table fraud_mobile_performance AS SELECT if(a.label=0,1,0) AS good,if(a.label=1,1,0) AS bad,if(a.label=2,1,0) AS unknown,a.apply_time,b.history_due_day,b.current_due_day,b.mobile from fqz_order_performance_data_new a inner join creditloan.s_c_apply_user b ON a.cert_no= b.cert_no AND a.year='2017' AND a.month ='09' AND a.day='19' AND b.year='2017' AND b.month='09' AND b.day='19';
5 | -- 社区 黑名单
6 |  create table fraud_community_mobile_black as select a.community_mobile,a.mobile,if(b.mobile is not null,0,1) as lable from louvain_result2 a left outer join creditloan.s_c_loan_blacklist b on a.community_mobile = b.mobile and b.year='2017' and b.month='09' and b.day='19';
7 | -- 社区id每个人的逾期情况按社区id,是否黑名单分组
8 |  create table fraud_community_mobile_black_performance as SELECT a.community_mobile,a.lable,sum(if(b.good>=0,b.good,0)) goods,sum(if(b.bad>=0,b.bad,0)) bads,sum(if(b.unknown>=0,b.unknown,0)) unknowns,sum(if(b.history_due_day>=0,b.history_due_day,0)) history_due_days,sum(if(b.current_due_day>=0,b.current_due_day,0)) current_due_days from fraud_community_mobile_black AS a LEFT JOIN fraud_mobile_performance AS b on a.community_mobile=b.mobile GROUP BY a.community_mobile,a.lable;
9 | 


--------------------------------------------------------------------------------
/apply/src/main/resources/log4j.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE log4j:configuration PUBLIC "-//APACHE//DTD LOG4J 1.2//EN" "http://logging.apache.org/log4j/1.2/apidocs/org/apache/log4j/xml/doc-files/log4j.dtd">
 3 | <log4j:configuration>
 4 | 
 5 |     <appender name="console" class="org.apache.log4j.ConsoleAppender">
 6 |         <param name="Target" value="System.out" />
 7 |         <param name="Threshold" value="ERROR" />
 8 |         <!--<param name="Threshold" value="DEBUG" />-->
 9 |         <param name="Encoding" value="UTF-8"></param>
10 |         <layout class="org.apache.log4j.PatternLayout">
11 |             <!--<param name="ConversionPattern" value="%d{ABSOLUTE} %-5p %C{1}:%L - %m%n" />-->
12 |             <param name="ConversionPattern" value="[graphx-analysis-apply][%d][%l][%p]->[%m]%n"/>
13 |         </layout>
14 |     </appender>
15 | 
16 |     <appender name="dataAnalysisLog" class="org.apache.log4j.DailyRollingFileAppender">
17 |         <param name="File" value="/home/hadoop/grogram/analysis/graphx_analysis" />
18 |         <param name="DatePattern" value="'.'yyyy-MM-dd'.log'" />
19 |         <layout class="org.apache.log4j.PatternLayout">
20 |             <param name="ConversionPattern" value="[graphx-analysis-apply][%d][%l][%p]->[%m]%n" />
21 |         </layout>
22 |     </appender>
23 | 
24 |     <logger name="com.lakala.datacenter" additivity="false">
25 |         <level value="debug"></level>
26 |         <appender-ref ref="console" />
27 |         <appender-ref ref="dataAnalysisLog" />
28 |     </logger>
29 | 
30 |     <root>
31 |         <priority value="info"></priority>
32 |         <appender-ref ref="dataAnalysisLog" />
33 |         <appender-ref ref="console" />
34 |     </root>
35 | </log4j:configuration>
36 | 


--------------------------------------------------------------------------------
/apply/src/main/resources/product/config.properties:
--------------------------------------------------------------------------------
1 | hdfs_root_path=hdfs://ns1/


--------------------------------------------------------------------------------
/apply/src/main/resources/product/hdfs-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" ?>
  2 | <!-- Autogenerated by Cloudera Manager
  3 | -->
  4 | <configuration>
  5 |     <property>
  6 |         <name>dfs.nameservices</name>
  7 |         <value>ns1</value>
  8 |     </property>
  9 |     <property>
 10 |         <name>dfs.client.failover.proxy.provider.ns1</name>
 11 |         <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
 12 |     </property>
 13 |     <property>
 14 |         <name>dfs.ha.automatic-failover.enabled.ns1</name>
 15 |         <value>true</value>
 16 |     </property>
 17 |     <property>
 18 |         <name>ha.zookeeper.quorum</name>
 19 |         <value>
 20 |             datanode4.lakala.com:2181,datanode5.lakala.com:2181,datanode6.lakala.com:2181,datanode7.lakala.com:2181,datanode8.lakala.com:2181
 21 |         </value>
 22 |     </property>
 23 |     <property>
 24 |         <name>dfs.ha.namenodes.ns1</name>
 25 |         <value>namenode114,namenode148</value>
 26 |     </property>
 27 |     <property>
 28 |         <name>dfs.namenode.rpc-address.ns1.namenode114</name>
 29 |         <value>namenode.lakala.com:8020</value>
 30 |     </property>
 31 |     <property>
 32 |         <name>dfs.namenode.servicerpc-address.ns1.namenode114</name>
 33 |         <value>namenode.lakala.com:8022</value>
 34 |     </property>
 35 |     <property>
 36 |         <name>dfs.namenode.http-address.ns1.namenode114</name>
 37 |         <value>namenode.lakala.com:50070</value>
 38 |     </property>
 39 |     <property>
 40 |         <name>dfs.namenode.https-address.ns1.namenode114</name>
 41 |         <value>namenode.lakala.com:50470</value>
 42 |     </property>
 43 |     <property>
 44 |         <name>dfs.namenode.rpc-address.ns1.namenode148</name>
 45 |         <value>namenodestandby.lakala.com:8020</value>
 46 |     </property>
 47 |     <property>
 48 |         <name>dfs.namenode.servicerpc-address.ns1.namenode148</name>
 49 |         <value>namenodestandby.lakala.com:8022</value>
 50 |     </property>
 51 |     <property>
 52 |         <name>dfs.namenode.http-address.ns1.namenode148</name>
 53 |         <value>namenodestandby.lakala.com:50070</value>
 54 |     </property>
 55 |     <property>
 56 |         <name>dfs.namenode.https-address.ns1.namenode148</name>
 57 |         <value>namenodestandby.lakala.com:50470</value>
 58 |     </property>
 59 |     <property>
 60 |         <name>dfs.replication</name>
 61 |         <value>3</value>
 62 |     </property>
 63 |     <property>
 64 |         <name>dfs.blocksize</name>
 65 |         <value>134217728</value>
 66 |     </property>
 67 |     <property>
 68 |         <name>dfs.client.use.datanode.hostname</name>
 69 |         <value>false</value>
 70 |     </property>
 71 |     <property>
 72 |         <name>fs.permissions.umask-mode</name>
 73 |         <value>022</value>
 74 |     </property>
 75 |     <property>
 76 |         <name>dfs.namenode.acls.enabled</name>
 77 |         <value>true</value>
 78 |     </property>
 79 |     <property>
 80 |         <name>dfs.client.use.legacy.blockreader</name>
 81 |         <value>false</value>
 82 |     </property>
 83 |     <property>
 84 |         <name>dfs.client.read.shortcircuit</name>
 85 |         <value>false</value>
 86 |     </property>
 87 |     <property>
 88 |         <name>dfs.domain.socket.path</name>
 89 |         <value>/var/run/hdfs-sockets/dn</value>
 90 |     </property>
 91 |     <property>
 92 |         <name>dfs.client.read.shortcircuit.skip.checksum</name>
 93 |         <value>false</value>
 94 |     </property>
 95 |     <property>
 96 |         <name>dfs.client.domain.socket.data.traffic</name>
 97 |         <value>false</value>
 98 |     </property>
 99 |     <property>
100 |         <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
101 |         <value>true</value>
102 |     </property>
103 | </configuration>


--------------------------------------------------------------------------------
/apply/src/main/resources/test/config.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luhm2017/graphx-analysis/9e3a96ec0df5da8208655face4ff0b0e6b3ed497/apply/src/main/resources/test/config.properties


--------------------------------------------------------------------------------
/apply/src/main/scala/CastToInt.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.sql.api.java.UDF1
 2 | 
 3 | import scala.util.matching.Regex
 4 | 
 5 | /**
 6 |   * Created by linyanshi on 2017/9/14 0014.
 7 |   */
 8 | class CastToInt extends UDF1[String, Long] {
 9 |   val pattern = new Regex("[0-9]{1,}")
10 | 
11 |   override def call(value: String): Long = {
12 |     if (pattern.pattern.matcher(value).matches() && value.toLong < 86400l) value.trim.toLong
13 |     else if (pattern.pattern.matcher(value).matches() && value.toLong >= 86400l) 86400l
14 |     else 0L
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/ExploreLPAData.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.graphx.Edge
 2 | import org.apache.spark.{SparkConf, SparkContext}
 3 | 
 4 | /**
 5 |   * Created by linyanshi on 2017/9/14 0014.
 6 |   */
 7 | object ExploreLPAData {
 8 |   def main(args: Array[String]): Unit = {
 9 |     val conf = new SparkConf().setAppName("ExploreLPAData").set("spark.eventLog.enabled", "true")
10 |     val sc = new SparkContext(conf)
11 |     val rdd = sc.textFile(args(0), 100).mapPartitions(lines => lines.map { line =>
12 |       val arr = line.split(",")
13 |       Edge(arr(1).toLong,arr(2).toLong)
14 |     })
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/JudgeIsMobile.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.sql.api.java.UDF1
 2 | 
 3 | import scala.util.matching.Regex
 4 | 
 5 | /**
 6 |   * Created by linyanshi on 2017/9/14 0014.
 7 |   */
 8 | class JudgeIsMobile extends UDF1[String,Boolean]{
 9 |   val pattern = new Regex("^((17[0-9])|(14[0-9])|(13[0-9])|(15[^4,\\D])|(18[0,5-9]))\\d{8}$")
10 |   override def call(value: String): Boolean = {
11 |     pattern.pattern.matcher(value).matches()
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/LoadCallhistoryData.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.sql.hive.HiveContext
 2 | import org.apache.spark.sql.types.DataTypes
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by linyanshi on 2017/9/14 0014.
 7 |   */
 8 | object LoadCallhistoryData {
 9 |   def main(args: Array[String]): Unit = {
10 |     val conf = new SparkConf().setAppName("LoadCallhistoryData")
11 |     val sc = new SparkContext(conf)
12 |     val hc = new HiveContext(sc)
13 |     val date = args(0).split("-")
14 |     val year = date(0)
15 |     val month = date(1)
16 |     val day = date(2)
17 |     hc.sql("use datacenter")
18 |     hc.udf.register("isMobile", new JudgeIsMobile(), DataTypes.BooleanType)
19 |     hc.udf.register("castInt", new CastToInt(), DataTypes.LongType)
20 |     val hql =
21 |       s"""SELECT a.deviceid,a.loginname,a.caller_phone,sum(castInt(a.duration)) AS duration,max(a.date) AS date,max(a.collecttime) AS  collecttime
22 |          |FROM r_callhistory_week a WHERE a.year='${year}' AND a.month='${month}' AND a.day='${day}'
23 |          |     AND a.loginname is not null AND a.caller_phone is not null AND isMobile(a.loginname)
24 |          |     AND isMobile(a.caller_phone) AND a.duration is not null AND a.collecttime <>'null'
25 |          |     group by a.deviceid,a.loginname,a.caller_phone
26 |        """.stripMargin
27 |     hc.sql(hql).repartition(100).mapPartitions(rows => rows.map { row => s"${row.getAs("deviceid")},${row.getAs("loginname")},${row.getAs("caller_phone")},${row.getAs("duration")},${row.getAs("date")},${row.getAs("collecttime")}" })
28 |       .saveAsTextFile(args(1))
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/TestSql.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.sql.{DataFrame, SQLContext}
 2 | import org.apache.spark.storage.StorageLevel
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by Administrator on 2017/7/27 0027.
 7 |   */
 8 | object TestSql {
 9 |   def main(args: Array[String]): Unit = {
10 |     val conf = new SparkConf().setMaster("local").setAppName("test")
11 |     val sc = new SparkContext(conf)
12 |     val sqlContext = new SQLContext(sc)
13 |     val list = List("1","2","3","3","5")
14 |     import sqlContext.implicits._
15 |     val vertexInfoDF = sc.parallelize(list).toDF().persist(StorageLevel.MEMORY_AND_DISK_SER)
16 |     // 用聚合顶点信息来创建特征向量的函数
17 |     val mean: DataFrame = vertexInfoDF.agg("_1" -> "mean")
18 |     val sd: DataFrame = vertexInfoDF.agg("_1" -> "stddev")
19 | //    val median: DataFrame = vertexInfoDF.agg("_1" -> "median")
20 |     val min: DataFrame = vertexInfoDF.agg("_1" -> "min")
21 |     val max: DataFrame = vertexInfoDF.agg("_1" -> "max")
22 |     val skew: DataFrame = vertexInfoDF.agg("_1" -> "skewness")
23 |     val kurt: DataFrame = vertexInfoDF.agg("_1" -> "kurtosis")
24 |     val vari: DataFrame = vertexInfoDF.agg("_1" -> "variance")
25 | 
26 |     val joinedStats: DataFrame = sd.join(mean).join(min).join(max).join(skew).join(kurt).join(vari)
27 | //      .join(median)
28 |     println(joinedStats.printSchema())
29 |     println(joinedStats.foreach(row=>println(row.get(0))))
30 |     vertexInfoDF.unpersist(blocking = true)
31 |     val sdtestDF = Seq((1.2.toDouble, 1.6.toDouble, 1.8.toDouble, 1.9.toDouble))
32 |       .toDF("numNodes", "numEdges", "maxDeg", "avgDeg")
33 |     val df = sdtestDF.join(joinedStats)
34 |     println(df.count())  }
35 | 
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/abstractions/PregelProgram.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.abstractions
 2 | 
 3 | /**
 4 |   * Created by peter on 2017/4/27.
 5 |   */
 6 | 
 7 | import org.apache.spark.graphx._
 8 | 
 9 | import scala.reflect.ClassTag
10 | 
11 | /**
12 |   * The [[PregelProgram]] abstraction wraps Spark's Pregel API implementation from the [[GraphOps]]
13 |   * class into a model that is easier to write graph algorithms.
14 |   * @tparam VertexState is the generic type representing the state of a vertex
15 |   */
16 | abstract class PregelProgram[VertexState: ClassTag, VD: ClassTag, ED: ClassTag] protected() extends Serializable {
17 | 
18 |   @transient val graph: Graph[VD, ED]
19 | 
20 |   /**
21 |     * The vertex program receives a state update and acts to update its state
22 |     * @param id is the [[VertexId]] that this program will perform a state operation for
23 |     * @param state is the current state of this [[VertexId]]
24 |     * @param message is the state received from another vertex in the graph
25 |     * @return a [[VertexState]] resulting from a comparison between current state and incoming state
26 |     */
27 |   def vertexProgram(id : VertexId, state : VertexState, message : VertexState) : VertexState
28 | 
29 |   /**
30 |     * The message broker sends and receives messages. It will initially receive one message for
31 |     * each vertex in the graph.
32 |     * @param triplet An edge triplet is an object containing a pair of connected vertex objects and edge object.
33 |     *                For example (v1)-[r]->(v2)
34 |     * @return The message broker returns a key value list, each containing a VertexId and a new message
35 |     */
36 |   def messageBroker(triplet :EdgeTriplet[VertexState, ED]) : Iterator[(VertexId, VertexState)]
37 | 
38 |   /**
39 |     * This method is used to reduce or combine the set of all state outcomes produced by a vertexProgram
40 |     * for each vertex in each superstep iteration. Each vertex has a list of state updates received from
41 |     * other vertices in the graph via the messageBroker method. This method is used to reduce the list
42 |     * of state updates into a single state for the next superstep iteration.
43 |     * @param a A first [[VertexState]] representing a partial state of a vertex.
44 |     * @param b A second [[VertexState]] representing a different partial state of a vertex
45 |     * @return a merged [[VertexState]] representation from the two [[VertexState]] parameters
46 |     */
47 |   def combinerMessage(a: VertexState, b: VertexState) : VertexState
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/apply/buildGraph/NewEdgeArr.scala:
--------------------------------------------------------------------------------
1 | package com.lakala.datacenter.apply.buildGraph
2 | 
3 | /**
4 |   * Created by linyanshi on 2017/9/1 0001.
5 |   */
6 | case class NewEdgeArr(srcV: String, dstV: String, var srcType: String, dstType: String, init: Boolean = false)
7 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/apply/model/ApplyInfo.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.apply.model
 2 | 
 3 | import org.apache.commons.lang3.StringUtils
 4 | 
 5 | /**
 6 |   * Created by ASUS-PC on 2017/4/13.
 7 |   */
 8 | class ApplyInfo(var order_id: String = "",
 9 |                 var contract_no: String = "",
10 |                 var business_no: String = "",
11 |                 var term_id: String = "",
12 |                 var loan_pan: String = "",
13 |                 var return_pan: String = "",
14 |                 var empmobile: String = "",
15 |                 var datatype: Int = 0 //0,1黑,2百
16 |                ) extends BaseEntity with Product {
17 |   override def toString = s"ApplyInfo(order_id=$order_id, contract_no=$contract_no, business_no=$business_no, term_id=$term_id, loan_pan=$loan_pan, return_pan=$return_pan, empmobile=$empmobile)"
18 | 
19 |   override def productElement(idx: Int): Any = idx match {
20 |     case 0 => order_id
21 |     case 1 => contract_no
22 |     case 2 => business_no
23 |     case 3 => term_id
24 |     case 4 => loan_pan
25 |     case 5 => return_pan
26 |     case 6 => empmobile
27 |     case 7 => datatype
28 |     case 8 => inDeg
29 |     case 9 => outDeg
30 |   }
31 | 
32 |   override def productArity: Int = 10
33 | 
34 |   override def canEqual(that: Any): Boolean = that.isInstanceOf[ApplyInfo]
35 | 
36 |   override def equals(other: Any): Boolean = other match {
37 |     case that: ApplyInfo =>
38 |       (that canEqual this) &&
39 |         order_id == that.order_id &&
40 |         contract_no == that.contract_no &&
41 |         business_no == that.business_no &&
42 |         term_id == that.term_id &&
43 |         loan_pan == that.loan_pan &&
44 |         return_pan == that.return_pan &&
45 |         empmobile == that.empmobile
46 |     case _ => false
47 |   }
48 | 
49 |   override def hashCode(): Int = {
50 |     val state = Seq(order_id, contract_no, business_no, term_id, loan_pan, return_pan, empmobile)
51 |     state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
52 |   }
53 | 
54 |   def getKey: String = {
55 |     if (StringUtils.isNotEmpty(order_id)) order_id
56 |     else if (StringUtils.isNotEmpty(contract_no)) contract_no
57 |     else if (StringUtils.isNotEmpty(business_no)) business_no
58 |     else if (StringUtils.isNotEmpty(term_id)) term_id
59 |     else if (StringUtils.isNotEmpty(loan_pan)) loan_pan
60 |     else if (StringUtils.isNotEmpty(return_pan)) return_pan
61 |     else empmobile
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/apply/model/BaseEntity.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.apply.model
 2 | 
 3 | /**
 4 |   * Created by ASUS-PC on 2017/4/17.
 5 |   */
 6 | trait BaseEntity extends Serializable {
 7 |   var inDeg: Int = 0;
 8 |   var outDeg: Int = 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/apply/model/CallHistoryEntity.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.apply.model
 2 | 
 3 | /**
 4 |   * Created by ASUS-PC on 2017/4/18.
 5 |   */
 6 | class CallHistoryEntity(var loginname: Long = 0L, var caller_phone: Long = 0L) extends BaseEntity with Serializable with Product {
 7 |   override def productElement(idx: Int): Any = idx match {
 8 |     case 0 => loginname
 9 |     case 1 => caller_phone
10 | 
11 |   }
12 | 
13 |   override def productArity: Int = 2
14 | 
15 |   override def canEqual(that: Any): Boolean = that.isInstanceOf[CallHistoryEntity]
16 | 
17 |   override def equals(other: Any): Boolean = other match {
18 |     case that: CallHistoryEntity =>
19 |       (that canEqual this) &&
20 |         loginname == that.loginname &&
21 |         caller_phone == that.caller_phone
22 |     case _ => false
23 |   }
24 | 
25 |   override def hashCode(): Int = {
26 |     val state = Seq(loginname, caller_phone)
27 |     state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
28 |   }
29 | 
30 |   override def toString = s"CallHistoryEntity($loginname, $caller_phone)"
31 | }
32 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/apply/model/EdgeEntity.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.apply.model
 2 | 
 3 | /**
 4 |   * Created by ASUS-PC on 2017/4/17.
 5 |   */
 6 | class EdgeEntity(var scrId: Long, val destId: Long, var attr: String) extends Serializable with Product {
 7 |   override def productElement(idx: Int): Any = idx match {
 8 |     case 0 => scrId
 9 |     case 1 => destId
10 |     case 2 => attr
11 |   }
12 | 
13 |   override def productArity: Int = 3
14 | 
15 |   override def canEqual(that: Any): Boolean = that.isInstanceOf[EdgeEntity]
16 | 
17 |   override def equals(other: Any): Boolean = other match {
18 |     case that: EdgeEntity =>
19 |       (that canEqual this) &&
20 |         scrId == that.scrId &&
21 |         destId == that.destId
22 |     case _ => false
23 |   }
24 | 
25 |   override def hashCode(): Int = {
26 |     val state = Seq(scrId, destId)
27 |     state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/apply/model/NDegreeEntity.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.apply.model
 2 | 
 3 | /**
 4 |   * Created by ASUS-PC on 2017/4/24.
 5 |   */
 6 | case class NDegreeEntity(var attr: String = "",
 7 |                          var initType: Int = 0,
 8 |                          var loop: Int = 0)
 9 |   extends Serializable {
10 | }
11 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/faund/DatasetTitanic.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.faund
 2 | 
 3 | import java.util
 4 | 
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.mllib.linalg.Vectors
 7 | import org.apache.spark.mllib.regression.LabeledPoint
 8 | import org.apache.spark.rdd.RDD
 9 | import org.apache.spark.sql.{DataFrame, SQLContext}
10 | 
11 | /**
12 |   * Created by Administrator on 2017/7/28 0028.
13 |   */
14 | object DatasetTitanic {
15 |   def createDF(sqlContext: SQLContext, inputFile: String): DataFrame = { // options
16 |     val options = new util.HashMap[String, String]
17 |     options.put("header", "true")
18 |     options.put("path", inputFile)
19 |     options.put("delimiter", ",")
20 |     // create dataframe from input file
21 |     val df = sqlContext.load("com.databricks.spark.csv", options)
22 |     df.printSchema()
23 |     df
24 |   }
25 | 
26 |   // create an RDD of Vectors from a DataFrame
27 |   def createLabeledPointsRDD(ctx: SparkContext, sqlContext: SQLContext, inputFile: String): RDD[LabeledPoint] = {
28 |     val df = createDF(sqlContext, inputFile)
29 |     // convert dataframe to an RDD of Vectors
30 |     df.map { row =>
31 |       val survived = row.getString(1).toInt
32 |       val arr = new Array[Double](2)
33 |       arr(0) = toDouble(row.getString(5))
34 |       arr(1) = toDouble(row.getString(6))
35 |       new LabeledPoint(survived, Vectors.dense(arr))
36 |     }
37 |   }
38 | 
39 |   def toDouble = (value: String) => {
40 |     if (value.length == 0) 0.0 else value.toDouble
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/faund/SparkConfUtil.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.faund
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | 
 5 | /**
 6 |   * Created by Administrator on 2017/7/28 0028.
 7 |   */
 8 | object SparkConfUtil {
 9 |   val isLocal = true;
10 | 
11 |   def setConf(conf: SparkConf): Unit = {
12 | 
13 |     if (isLocal) {
14 |       conf.setMaster("local")
15 |       conf.set("spark.broadcast.compress", "false")
16 |       conf.set("spark.shuffle.compress", "false")
17 |     }
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/faund/Titanic.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.faund
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | import org.apache.spark.mllib.regression.LabeledPoint
 5 | import org.apache.spark.rdd.RDD
 6 | import org.apache.spark.sql.{DataFrame, SQLContext}
 7 | 
 8 | /**
 9 |   * Created by Administrator on 2017/7/28 0028.
10 |   */
11 | object Titanic {
12 |   def main(args: Array[String]) {
13 |     if (args.length < 1) {
14 |       System.err.println("Usage: Titanic <input_file>")
15 |       System.exit(1)
16 |     }
17 | 
18 |     val inputFile: String = args(0)
19 |     val sparkConf: SparkConf = new SparkConf().setAppName("Titanic")
20 |     SparkConfUtil.setConf(sparkConf)
21 | 
22 |     val sc: SparkContext = new SparkContext(sparkConf)
23 |     val sqlContext: SQLContext = new SQLContext(sc)
24 |     val results: DataFrame = DatasetTitanic.createDF(sqlContext, inputFile)
25 | 
26 |     results.printSchema
27 | 
28 |     val data: RDD[LabeledPoint] = DatasetTitanic.createLabeledPointsRDD(sc, sqlContext, inputFile)
29 |     val splits: Array[RDD[LabeledPoint]] = data.randomSplit(Array[Double](0.7, 0.3))
30 |     val trainingData: RDD[LabeledPoint] = splits(0)
31 |     val testData: RDD[LabeledPoint] = splits(1)
32 | 
33 |     System.out.println("\nRunning example of classification using RandomForest\n")
34 |     ScalaRandomForest.testClassification(trainingData, testData)
35 | 
36 |     System.out.println("\nRunning example of regression using RandomForest\n")
37 |     ScalaRandomForest.testRegression(trainingData, testData)
38 | 
39 |     sc.stop
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/jaccard/Jaccard.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.jaccard
 2 | 
 3 | import org.apache.spark.graphx.{EdgeDirection, Graph}
 4 | import org.apache.spark.rdd.RDD
 5 | 
 6 | /**
 7 |   * Created by linyanshi on 2017/9/20 0020.
 8 |   */
 9 | object Jaccard {
10 |   /**
11 |     * Return a RDD of (1-id, 2-id, similarity) where
12 |     * 1-id < 2-id to avoid duplications
13 |     *
14 |     * @param graph
15 |     * @return
16 |     */
17 | 
18 |   def jaccardSimilarityAllMobiles(graph: Graph[Int, Int]): RDD[(Long, Long, Double)] = {
19 |     val neighbors = graph.collectNeighborIds(EdgeDirection.Either).map(x => (x._1, x._2))
20 |     val combinations = neighbors.cartesian(neighbors)
21 |     val SimilarityAll = combinations.map { x => (x._1._1, x._2._1, jaccard(x._1._2.toSet, x._2._2.toSet)) }
22 |     val result = SimilarityAll.map(x => (x._3, (x._1, x._2))).sortByKey(false, 1).map(x => (x._2._1, x._2._2, x._1))
23 |     result
24 |   }
25 | 
26 |   /**
27 |     * Helper function
28 |     * Jaccard 系数定义为A与B交集的大小与A与B并集的大小的比值
29 |     * Given two sets, compute its Jaccard similarity and return its result.
30 |     * If the union part is zero, then return 0.
31 |     * @param a
32 |     * @param b
33 |     * @tparam A
34 |     * @return
35 |     */
36 |   def jaccard[A](a: Set[A], b: Set[A]): Double = {
37 |     val union: Double = (a ++ b).size
38 |     val intersect: Double = a.intersect(b).size
39 |     return (if (union == 0) 0.0 else (intersect / union))
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/jaccard/PowerIterationClustering.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.jaccard
 2 | 
 3 | import org.apache.spark.mllib.clustering.PowerIterationClustering
 4 | import org.apache.spark.rdd.RDD
 5 | 
 6 | /**
 7 |   * Created by linyanshi on 2017/9/20 0020.
 8 |   */
 9 | object PowerIterationClustering {
10 | 
11 |   /**
12 |     * run PIC using Spark's PowerIterationClustering implementation
13 |     * @param similarities All pair similarities in the shape of RDD[(selfmobile, caller, similarity)]
14 |     * @return Cluster assignment for each patient in the shape of RDD[(mobile, Cluster)]
15 |     */
16 |   def runPIC(similarities: RDD[(Long, Long, Double)]): RDD[(Long, Int)] = {
17 |     val sc = similarities.sparkContext
18 | 
19 | 
20 |     /** Remove placeholder code below and run Spark's PIC implementation */
21 |     similarities.cache().count()
22 |     val pic = new PowerIterationClustering().setK(3).setMaxIterations(100)
23 |     val model=pic.run(similarities)
24 |     val result = model.assignments.map(a => (a.id,a.cluster))
25 |     val check = result.map(x=>x.swap).groupByKey().map(x=>(x._1,x._2.size))
26 | 
27 |     println("PIC: ")
28 |     println(check.foreach(println))
29 | 
30 |     result
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/louvain/HDFSLouvainRunner.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.louvain
 2 | 
 3 | /**
 4 |   * Created by chenqingqing on 2017/4/4.
 5 |   */
 6 | 
 7 | 
 8 | import org.apache.spark.SparkContext
 9 | import org.apache.spark.graphx._
10 | import scala.Array.canBuildFrom
11 | 
12 | /**
13 |   * Execute the louvain algorithim and save the vertices and edges in hdfs at each level.
14 |   * Can also save locally if in local mode.
15 |   *
16 |   * See LouvainHarness for algorithm details
17 |   */
18 | class HDFSLouvainRunner(minProgress: Int, progressCounter: Int, outputdir: String) extends LouvainHarness(minProgress: Int, progressCounter: Int) {
19 | 
20 |   var qValues = Array[(Int, Double)]()
21 | 
22 |   override def saveLevel(sc: SparkContext, level: Int, q: Double, graph: Graph[VertexState, Double]) = {
23 |     graph.vertices.saveAsTextFile(outputdir + "/level_" + level + "_vertices")
24 |     graph.edges.saveAsTextFile(outputdir + "/level_" + level + "_edges")
25 |     qValues = qValues :+ ((level, q))
26 |     println(s"qValue: $q")
27 | 
28 |     // overwrite the q values at each level
29 |     sc.parallelize(qValues, 1).saveAsTextFile(outputdir + "/qvalues")
30 |   }
31 | 
32 |   override def finalSave(sc: SparkContext, level: Int, q: Double, graph: Graph[VertexState, Double]) = {
33 |     graph.vertices.filter(k=>k._1 != k._2.community).sortBy(k=>k._2.community).map { x => x._1 + "," + x._2 }.repartition(10).saveAsTextFile(outputdir)
34 |     //graph.edges.saveAsTextFile(outputdir+"/final_edges")
35 | 
36 |     println(s"qValue: $q")
37 |   }
38 | 
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/louvain/VertexData.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.louvain
 2 | 
 3 | import scala.collection.mutable.HashSet
 4 | 
 5 | /**
 6 |   * Created by chenqingqing on 2017/4/4.
 7 |   */
 8 | class VertexData(val vId: Long, var cId: Long) extends Serializable {
 9 |   var innerDegree = 0.0 //内部结点的权重
10 |   var innerVertices = new HashSet[Long]() //内部的结点
11 |   var degree = 0.0 //结点的度
12 |   var commVertices = new HashSet[Long]() //社区中的结点
13 | }


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/louvain/VertexState.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.louvain
 2 | 
 3 | /**
 4 |   * Louvain vertex state
 5 |   * Contains all information needed for louvain community detection
 6 |   */
 7 | class VertexState extends Serializable {
 8 | 
 9 |   var community = -1L //社区ID
10 |   var communitySigmaTot = 0D //入度
11 |   var internalWeight = 0D // self edges
12 |   var nodeWeight = 0D; //out degree //出度
13 |   var changed = false
14 |   var q = 0D //模块度的值
15 | 
16 |   override def toString(): String = {
17 |     //        "{community:"+community+",communitySigmaTot:"+communitySigmaTot+
18 |     //          ",internalWeight:"+internalWeight+",nodeWeight:"+nodeWeight+"}"
19 | //    s"community:$community,communitySigmaTot:$communitySigmaTot,internalWeight:$internalWeight,nodeWeight:$nodeWeight"
20 |     s"community:$community,q:$q"
21 | //        community.toString
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/CallHistoryPageRank.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import org.apache.log4j.{Level, Logger}
 4 | import org.apache.spark.graphx.{Edge, Graph}
 5 | import org.apache.spark.storage.StorageLevel
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | /**
 9 |   * Created by linyanshi on 2017/9/19 0019.
10 |   */
11 | object CallHistoryPageRank {
12 |   def main(args: Array[String]): Unit = {
13 | 
14 |     Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
15 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.ERROR)
16 | 
17 |     val conf = new SparkConf().setAppName("CallHistoryPageRank")
18 |     val sc = new SparkContext(conf)
19 |     val edgeRdd = sc.textFile(args(0)).mapPartitions(lines => lines.map { line =>
20 |       //      val arr = line.split("\t")
21 |       //      Edge(arr(0).toLong, arr(1).toLong, 1)
22 |       val arr = line.split(",")
23 |                   Edge(arr(1).toLong, arr(2).toLong, 1)
24 | //      Edge(arr(2).toLong, arr(1).toLong, arr(3).toInt)
25 |     })
26 |     //    val graph = GraphLoader.edgeListFile(sc, args(0), numEdgePartitions = 4)
27 | 
28 |     val graph = Graph.fromEdges(edgeRdd, 1, edgeStorageLevel = StorageLevel.MEMORY_AND_DISK_SER, vertexStorageLevel = StorageLevel.MEMORY_AND_DISK_SER)
29 |     //参数：图，迭代次数
30 |     val pageRankGraph = graph.pageRank(0.0001)
31 | 
32 |     pageRankGraph.vertices.sortBy(x => x._2).mapPartitions(ls => ls.map(k => s"${k._1},${k._2}")).repartition(1).saveAsTextFile(args(1))
33 |     sc.stop()
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/Driver.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import com.lakala.datacenter.grogress.ExportNDegreeData
 4 | 
 5 | /**
 6 |   * Created by Administrator on 2017/5/4 0004.
 7 |   */
 8 | 
 9 | 
10 | object Driver extends App {
11 |   override def main(args: Array[String]) = {
12 |     val enD = new ExportNDegreeData()
13 |     enD.main(args)
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/LPAAlgorithm.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import org.apache.log4j.{Level, Logger}
 4 | import org.apache.spark.graphx.{Edge, Graph}
 5 | import org.apache.spark.graphx.lib.LabelPropagation
 6 | import org.apache.spark.storage.StorageLevel
 7 | import org.apache.spark.{SparkConf, SparkContext}
 8 | import ml.sparkling.graph.operators.OperatorsDSL._
 9 | 
10 | /**
11 |   * Created by linyanshi on 2017/9/14 0014.
12 |   */
13 | object LPAAlgorithm {
14 |   def main(args: Array[String]): Unit = {
15 |     Logger.getLogger("org.apache.spark").setLevel(Level.ERROR);
16 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.ERROR);
17 | 
18 |     val conf = new SparkConf().setAppName("LPAAlgorithm")
19 |     val sc = new SparkContext(conf)
20 |     val edgeRdd = sc.textFile(args(0)).mapPartitions(lines => lines.map { line =>
21 |       val arr = line.split(",")
22 | //      Edge(arr(1).toLong,arr(2).toLong,arr(3).toInt)
23 |       Edge(arr(1).toLong,arr(2).toLong,1)
24 | //      Edge(arr(0).toLong,arr(1).toLong,1)
25 |     })
26 | //    val graph = GraphLoader.edgeListFile(sc, args(0), numEdgePartitions = 4)
27 | 
28 |     val graph =  Graph.fromEdges(edgeRdd,1,edgeStorageLevel=StorageLevel.MEMORY_AND_DISK_SER,vertexStorageLevel=StorageLevel.MEMORY_AND_DISK_SER)
29 |     //参数：图，迭代次数
30 |     val lpaGraph = LabelPropagation.run(graph.reverse, args(2).toInt)
31 |     val modularity = lpaGraph.modularity()
32 |     println(modularity)
33 |     lpaGraph.vertices.sortBy(x => x._2).mapPartitions(ls=>ls.map(k=>s"${k._1},${k._2}")).repartition(1).saveAsTextFile(args(1))
34 |     sc.stop()
35 |   }
36 | 
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/LPCoarseAlgorithm.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import ml.sparkling.graph.operators.OperatorsDSL._
 4 | import org.apache.log4j.{Level, Logger}
 5 | import org.apache.spark.graphx.{Edge, Graph}
 6 | import org.apache.spark.storage.StorageLevel
 7 | import org.apache.spark.{SparkConf, SparkContext}
 8 | 
 9 | /**
10 |   * Created by linyanshi on 2017/9/18 0018.
11 |   */
12 | object LPCoarseAlgorithm {
13 |   def main(args: Array[String]): Unit = {
14 |     Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
15 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.ERROR)
16 | 
17 |     val conf = new SparkConf().setAppName("LPCoarseAlgorithm")
18 |     val sc = new SparkContext(conf)
19 |     val edgeRdd = sc.textFile(args(0)).mapPartitions(lines => lines.map { line =>
20 |       //      val arr = line.split("\t")
21 |       //      Edge(arr(0).toLong, arr(1).toLong, 1)
22 |       val arr = line.split(",")
23 |       Edge(arr(1).toLong, arr(2).toLong, 1)
24 |     })
25 |     //    val graph = GraphLoader.edgeListFile(sc, args(0), numEdgePartitions = 4)
26 | 
27 |     val graph = Graph.fromEdges(edgeRdd, 1, edgeStorageLevel = StorageLevel.MEMORY_AND_DISK_SER, vertexStorageLevel = StorageLevel.MEMORY_AND_DISK_SER)
28 |     //参数：图，迭代次数
29 |     val lpaGraph = graph.LPCoarse(treatAsUndirected = true)
30 |     //    val modularity = lpaGraph.modularity()
31 |     //    println(modularity)
32 |     lpaGraph.vertices.mapPartitions(kcs => kcs.map(kc => (kc._1, kc._2.sortBy(k => k).head)))
33 |       .filter(k => k._1 != k._2).sortBy(x => x._2)
34 |       /*.mapPartitions(ls => ls.map(k => s"${k._1},${k._2.mkString(",")}"))*/ .repartition(1).saveAsTextFile(args(1))
35 |     sc.stop()
36 |   }
37 | 
38 | 
39 | 
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/LiveCommunityDetection.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | /**
 4 |   * Created by linyanshi on 2017/9/25 0025.
 5 |   */
 6 | object LiveCommunityDetection {
 7 |   def main(args: Array[String]): Unit = {
 8 |     if (args.length < 1) {
 9 |       System.err.println(
10 |         "Usage: LiveCommunityDetection <edge_list_file>\n" +
11 |           "    --numEPart=<num_edge_partitions>\n" +
12 |           "        The number of partitions for the graph's edge RDD.\n" +
13 |           "    [--tol=<tolerance>]\n" +
14 |           "        The tolerance allowed at convergence (smaller => more accurate). Default is " +
15 |           "0.001.\n" +
16 |           "    [--output=<output_file>]\n" +
17 |           "        If specified, the file to write the ranks to.\n" +
18 |           "    [--partStrategy=RandomVertexCut | EdgePartition1D | EdgePartition2D | " +
19 |           "CanonicalRandomVertexCut]\n" +
20 |           "        The way edges are assigned to edge partitions. Default is RandomVertexCut.")
21 |       System.exit(-1)
22 |     }
23 |     //file/data/graphx/input/followers.txt -numEPart=100 -tol=0.001 -output=F:\idea_workspace\SparkLearning\outfile -partStrategy=RandomVertexCut
24 |     Analytics.main(args.patch(0, List("pagerank"), 0))
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/LouvainDGA.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | /**
 4 |   * Created by linyanshi on 2017/9/14 0014.
 5 |   */
 6 | 
 7 | import com.lakala.datacenter.louvain.{HDFSLouvainRunner, VertexState}
 8 | import org.apache.log4j.{Level, Logger}
 9 | import org.apache.spark.graphx.{Edge, Graph}
10 | import org.apache.spark.{SparkConf, SparkContext}
11 | 
12 | //totalEdgeWeight: 1.56262281191699E15
13 | //# vertices moved: 61,897,309
14 | //# vertices moved: 13,746,461
15 | //# vertices moved: 5,352,635
16 | //# vertices moved: 130,270
17 | //# vertices moved: 82,426
18 | //# vertices moved: 71,584
19 | //# vertices moved: 71,105
20 | //# vertices moved: 70,030
21 | //# vertices moved: 69,937
22 | //
23 | //Completed in 18 cycles
24 | //
25 | //Starting Louvain level 1
26 | //totalEdgeWeight: 2.237895102976331E15
27 | //# vertices moved: 664,919
28 | //# vertices moved: 191,039
29 | //# vertices moved: 12,426
30 | //# vertices moved: 393
31 | //# vertices moved: 7
32 | //# vertices moved: 0
33 | //
34 | //Completed in 12 cycles
35 | //qValue: 0.9182326588364285
36 | // 总的用户数1232060 总的call_phone yong用户数 101825071
37 | //总的社区 275141 大于两个人的总的社区id 77442  关联黑名单 总的社区 1784
38 | 
39 | object LouvainDGA {
40 |   def main(args: Array[String]) {
41 |     Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
42 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
43 |     val conf = new SparkConf().setAppName("LouvainDGA")
44 |     conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
45 |     conf.registerKryoClasses(Array(classOf[VertexState]))
46 |    // intputpath  iterator 1 outputpath
47 |     val sc = new SparkContext(conf)
48 |     val data = sc.textFile(args(0))
49 |     val edges = data.map(line => {
50 |       val items = line.split(",")
51 | //            Edge(items(0).toLong, items(1).toLong, items(2).toDouble)
52 |       Edge(items(1).toLong, items(2).toLong, items(3).toDouble)
53 | //      Edge(items(1).toLong, items(2).toLong, 1d)
54 |     })
55 |     val graph = Graph.fromEdges(edges, 1)
56 |     val runner = new HDFSLouvainRunner(args(2).toInt, args(3).toInt, args(1))
57 |     runner.run(sc, graph)
58 |     sc.stop()
59 |   }
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/PICCallAlgorithm.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import org.apache.log4j.{Level, Logger}
 4 | import org.apache.spark.graphx.{Edge, Graph}
 5 | import org.apache.spark.mllib.clustering.PowerIterationClustering
 6 | import org.apache.spark.storage.StorageLevel
 7 | import org.apache.spark.{SparkConf, SparkContext}
 8 | 
 9 | /**
10 |   * Created by linyanshi on 2017/9/20 0020.
11 |   * http://blog.sina.com.cn/s/blog_482da2d20102drpt.html
12 |   */
13 | object PICCallAlgorithm {
14 |   def main(args: Array[String]) {
15 |     Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
16 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
17 |     val conf = new SparkConf().setAppName("PICCallAlgorithm")
18 | 
19 |     val sc = new SparkContext(conf)
20 |     val data = sc.textFile(args(0), 200)
21 |     val edges = data.map(line => {
22 |       val items = line.split(",")
23 |       Edge(items(1).toLong, items(2).toLong, 1)
24 |       //            val items = line.split("\t")
25 |       //                  Edge(items(0).toLong, items(1).toLong, 1)
26 |     })
27 |     val graph = Graph.fromEdges(edges, 1, edgeStorageLevel = StorageLevel.MEMORY_AND_DISK_SER, vertexStorageLevel = StorageLevel.MEMORY_AND_DISK_SER)
28 |     //参数：图，迭代次数
29 |     val pageRankGraph = graph.pageRank(0.0001)
30 |     val pic = new PowerIterationClustering().setK(args(2).toInt).setMaxIterations(args(3).toInt).setInitializationMode("degree")
31 |     val model = pic.run(pageRankGraph)
32 |     val result = model.assignments.map(a => (a.id, a.cluster))
33 |     result.mapPartitions(ves => ves.map(ve => s"${ve._1},${ve._2}")).repartition(1).saveAsTextFile(args(1))
34 |     //        val landmarks = sc.textFile("/user/guozhijie/explortoutput/louvainout4")
35 |     //          .mapPartitions(lines=>lines.map(line=>{val arr =line.split(",")
36 |     //            arr(1).toLong})).distinct().top(args(2).toInt)
37 |     //    val landmarks = data.map(line => {
38 |     //      val items = line.split(",")
39 |     //      items(1).toLong
40 |     //    }).distinct().top(args(2).toInt)
41 |     //    val landmarksBR = sc.broadcast(landmarks)
42 |     //    val shortPathGraph = ShortestPaths.run(graph, landmarksBR.value)
43 |     //    graph.unpersist()
44 |     //
45 |     //    implicit def iterebleWithAvg[T: Numeric](data: Iterable[T]) = new {
46 |     //      def avg = average(data)
47 |     //    }
48 |     //
49 |     //    def average[T](ts: Iterable[T])(implicit num: Numeric[T]) = {
50 |     //      num.toDouble(ts.sum) / ts.size
51 |     //    }
52 |     //
53 |     //    shortPathGraph.vertices.map {
54 |     //      vx =>
55 |     //        (vx._1, {
56 |     //          val dx = 1.0 / vx._2.map {
57 |     //            sx => sx._2
58 |     //          }.seq.avg
59 |     //          val d = if (dx.isNaN | dx.isNegInfinity | dx.isPosInfinity) 0.0 else dx
60 |     //          d
61 |     //        })
62 |     //    }.sortBy({ vx => vx._1 }, ascending = true)
63 |     //      .mapPartitions(rows => rows.filter(k => k._2 > 0d).map(row => s"${row._1},${row._2}")).repartition(1).saveAsTextFile(args(1))
64 |     //        val similarities = Jaccard.jaccardSimilarityAllMobiles(graph)
65 |     //        val centralityGraph: Graph[(Double,Double),Int] = graph.hits(VertexMeasureConfiguration(treatAsUndirected=true))
66 |     //    val picLabels = PowerIterationClustering.runPIC(similarities)
67 |     //    picLabels.mapPartitions(lca => lca.map(l => s"${l._1},${l._2}")).repartition(1).saveAsTextFile(args(1))
68 | 
69 |     //        val vertexembeddedness = graph.closenessCentrality(VertexMeasureConfiguration(treatAsUndirected = true))
70 |     //        vertexembeddedness.vertices.mapPartitions(ves=>ves.map(ve=>s"${ve._1},${ve._2}")).repartition(1).saveAsTextFile(args(1))
71 |     sc.stop()
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/main/PSCANAlgorithm.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import ml.sparkling.graph.api.operators.measures.VertexMeasureConfiguration
 4 | import ml.sparkling.graph.operators.OperatorsDSL._
 5 | import ml.sparkling.graph.operators.algorithms.community.pscan.PSCAN
 6 | import org.apache.log4j.{Level, Logger}
 7 | import org.apache.spark.graphx.{Edge, Graph}
 8 | import org.apache.spark.storage.StorageLevel
 9 | import org.apache.spark.{SparkConf, SparkContext}
10 | 
11 | /**
12 |   * Created by linyanshi on 2017/9/18 0018.
13 |   */
14 | object PSCANAlgorithm {
15 | 
16 |   def main(args: Array[String]): Unit = {
17 |     Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
18 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.ERROR)
19 | 
20 |     val conf = new SparkConf().setAppName("PSCANAlgorithm")
21 |     val sc = new SparkContext(conf)
22 |     val edgeRdd = sc.textFile(args(0)).mapPartitions(lines => lines.map { line =>
23 | //      val arr = line.split("\t")
24 | //      Edge(arr(0).toLong, arr(1).toLong, 1)
25 |             val arr = line.split(",")
26 | //            Edge(arr(1).toLong, arr(2).toLong, 1)
27 |             Edge(arr(1).toLong, arr(2).toLong, arr(3).toInt)
28 |     })
29 |     //    val graph = GraphLoader.edgeListFile(sc, args(0), numEdgePartitions = 4)
30 | 
31 |     val graph = Graph.fromEdges(edgeRdd, 1, edgeStorageLevel = StorageLevel.MEMORY_AND_DISK_SER, vertexStorageLevel = StorageLevel.MEMORY_AND_DISK_SER)
32 |     //参数：图，迭代次数
33 |     val pscanGraph = PSCAN.computeConnectedComponents(graph, 0.000001)
34 |     //    val lpaGraph = PSCAN.computeConnectedComponentsUsing(graph, args(2).toInt)
35 |     val modularity = pscanGraph.modularity()
36 | 
37 |     println(modularity)
38 | 
39 | 
40 |     pscanGraph.vertices.filter(k => k._1 != k._2).sortBy(x => x._2).mapPartitions(ls => ls.map(k => s"${k._1},${k._2}")).repartition(1).saveAsTextFile(args(1))
41 |     sc.stop()
42 |   }
43 | 
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/talk/types/City.scala:
--------------------------------------------------------------------------------
1 | package com.lakala.datacenter.talk.types
2 | 
3 | import org.apache.spark.graphx.VertexId
4 | 
5 | case class City(name: String, id: VertexId) {
6 |   override def toString() = name + " [" + id + "]"
7 | }
8 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/talk/types/Person.scala:
--------------------------------------------------------------------------------
1 | package com.lakala.datacenter.talk.types
2 | 
3 | case class Person(name: String, age: Int)
4 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/talk/types/VertexAttribute.scala:
--------------------------------------------------------------------------------
1 | package com.lakala.datacenter.talk.types
2 | 
3 | case class VertexAttribute(cityName: String, distance: Double, path: List[City])


--------------------------------------------------------------------------------
/apply/src/main/scala/com/lakala/datacenter/utils/UtilsToos.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.utils
 2 | 
 3 | import java.nio.charset.StandardCharsets
 4 | 
 5 | import com.google.common.hash.Hashing
 6 | import com.lakala.datacenter.common.utils.DateTimeUtils
 7 | 
 8 | import scala.util.matching.Regex
 9 | 
10 | /**
11 |   * Created by ASUS-PC on 2017/4/18.
12 |   */
13 | object UtilsToos {
14 |   /**
15 |     * 根据字符串生成唯一的hashcode值
16 |     *
17 |     * @param str
18 |     * @return
19 |     */
20 |   def hashId(str: String) = {
21 |     Hashing.md5().hashString(str, StandardCharsets.UTF_8).asLong()
22 |   }
23 | 
24 |   /**
25 |     * 手机号,电话号码验证
26 |     *
27 |     * @param  num
28 |     * @return 验证通过返回true
29 |     */
30 |   def isMobileOrPhone(num: String): Boolean = {
31 |     val pattern = new Regex("^((17[0-9])(14[0-9])|(13[0-9])|(15[^4,\\D])|(18[0,5-9]))\\d{8}$")
32 |     val pattern2 = new Regex("(?:(\\(\\+?86\\))(0[0-9]{2,3}\\-?)?([2-9][0-9]{6,7})+(\\-[0-9]{1,4})?)|(?:(86-?)?(0[0-9]{2,3}\\-?)?([2-9][0-9]{6,7})+(\\-[0-9]{1,4})?)") // 验证带区号的
33 | //    val pattern2 = new Regex("^[0][1-9]{2,3}-[0-9]{5,10}$") // 验证带区号的
34 |     val pattern3 = new Regex("^[1-9]{1}[0-9]{5,8}$") // 验证没有区号的
35 |     num match {
36 |       case pattern(_*) => {
37 |         true
38 |       }
39 |       case pattern2(_*) => {
40 |         true
41 |       }
42 |       case pattern3(_*) => {
43 |         true
44 |       }
45 |       case _ => {
46 |         false
47 |       }
48 |     }
49 |   }
50 | 
51 |   def jugeInit(dataDt: String, sdt: String, edt: String): Boolean = {
52 |     var init = false
53 |     try {
54 |       init = if (DateTimeUtils.parseDataString(dataDt).getMillis >= DateTimeUtils.parseDataString(sdt).getMillis
55 |         && DateTimeUtils.parseDataString(dataDt).getMillis <= DateTimeUtils.parseDataString(edt).getMillis) true
56 |       else false
57 |     } catch {
58 |       case e: Exception =>
59 |     }
60 |     init
61 |   }
62 | 
63 |   def byDateFileterData(line: String, edt: String): Boolean = {
64 |     var init = false
65 |     try {
66 |       val arr = line.split(",")
67 |       val dt = if (arr(5).indexOf(".") > 0) arr(5).substring(0, arr(5).indexOf(".")) else arr(5)
68 |       init = if (DateTimeUtils.parseDataString(dt).getMillis <= DateTimeUtils.parseDataString(edt).getMillis) true
69 |       else false
70 |     } catch {
71 |       case e: Exception =>
72 |     }
73 |     init
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/edu/gatech/cse8803/clustering/PowerIterationClustering.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * @author Ting Pan <tpan35@gatech.edu>.
 3 |   */
 4 | 
 5 | package edu.gatech.cse8803.clustering
 6 | 
 7 | import org.apache.spark.rdd.RDD
 8 | import org.apache.spark.mllib.clustering.{PowerIterationClustering => PIC}
 9 | import org.apache.spark.mllib.clustering.PowerIterationClustering
10 | 
11 | 
12 | /**
13 |   * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
14 |   * [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very
15 |   * low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise
16 |   * similarity matrix of the data.
17 |   *
18 |   * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
19 |   */
20 | 
21 | object PowerIterationClustering {
22 | 
23 |   /** run PIC using Spark's PowerIterationClustering implementation
24 |     *
25 |     * @input: All pair similarities in the shape of RDD[(patientID1, patientID2, similarity)]
26 |     * @return: Cluster assignment for each patient in the shape of RDD[(PatientID, Cluster)]
27 |     *
28 |     * */
29 | 
30 |     def runPIC(similarities: RDD[(Long, Long, Double)]): RDD[(Long, Int)] = {
31 |     val sc = similarities.sparkContext
32 | 
33 | 
34 |     /** Remove placeholder code below and run Spark's PIC implementation */
35 |     similarities.cache().count()
36 |     val pic = new PowerIterationClustering().setK(3).setMaxIterations(100)
37 |     val model=pic.run(similarities)
38 |     val result = model.assignments.map(a => (a.id,a.cluster))
39 |     //val check = result.map(x=>x.swap).groupByKey().map(x=>(x._1,x._2.size))
40 | 
41 |     //println("PIC: ")
42 |     //println(check.foreach(println))
43 | 
44 |     result
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/edu/gatech/cse8803/ioutils/CSVUtils.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author Ting Pan <tpan35@gatech.edu>.
 3 |  */
 4 | package edu.gatech.cse8803.ioutils
 5 | 
 6 | import org.apache.spark.sql.SchemaRDD
 7 | import org.apache.spark.sql.SQLContext
 8 | import com.databricks.spark.csv.CsvContext
 9 | 
10 | 
11 | object CSVUtils {
12 |   def loadCSVAsTable(sqlContext: SQLContext, path: String, tableName: String): SchemaRDD = {
13 |     val data = sqlContext.csvFile(path)
14 |     data.registerTempTable(tableName)
15 |     data
16 |   }
17 | 
18 |   def loadCSVAsTable(sqlContext: SQLContext, path: String): SchemaRDD = {
19 |     loadCSVAsTable(sqlContext, path, inferTableNameFromPath(path))
20 |   }
21 | 
22 |   private val pattern = "(\\w+)(\\.csv)?$".r.unanchored
23 |   def inferTableNameFromPath(path: String) = path match {
24 |     case pattern(filename, extension) => filename
25 |     case _ => path
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/edu/gatech/cse8803/jaccard/Jaccard.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * *
 3 |   * @author: Ting Pan <tpan35@gatech.edu>
 4 |   **/
 5 | package edu.gatech.cse8803.jaccard
 6 | 
 7 | import edu.gatech.cse8803.model._
 8 | import edu.gatech.cse8803.model.{EdgeProperty, VertexProperty}
 9 | import org.apache.spark.graphx._
10 | import org.apache.spark.rdd.RDD
11 | 
12 | object Jaccard {
13 | 
14 |   def jaccardSimilarityOneVsAll(graph: Graph[VertexProperty, EdgeProperty], patientID: Long): List[Long] = {
15 |     /**
16 |       * Given a patient ID, compute the Jaccard similarity w.r.t. to all other patients.
17 |       * Return a List of patient IDs ordered by the highest to the lowest similarity.
18 |       * For ties, random order is okay
19 |       */
20 | 
21 | 
22 |     val neighbors = graph.collectNeighborIds(EdgeDirection.Either).map(x => (x._1, x._2.filter(p => p > 1000))).filter(_._1 <= 1000)
23 |     val neighbors_wo_patient = neighbors.filter(_._1 != patientID)
24 |     val source = neighbors.filter(_._1 == patientID).map(_._2).collect.flatten.toSet
25 |     val SimilarityOneVsAll = neighbors_wo_patient.map { case (vid, nbrs) => (vid, jaccard(source, nbrs.toSet)) }
26 |     val result = SimilarityOneVsAll.sortBy(_._2, false).map(_._1).take(10).toList
27 |     result
28 |   }
29 | 
30 |   def jaccardSimilarityAllPatients(graph: Graph[VertexProperty, EdgeProperty]): RDD[(Long, Long, Double)] = {
31 |     /**
32 |       * Given a patient, med, diag, lab graph, calculate pairwise similarity between all
33 |       *patients. Return a RDD of (patient-1-id, patient-2-id, similarity) where
34 |       * patient-1-id < patient-2-id to avoid duplications
35 |       */
36 |     val neighbors = graph.collectNeighborIds(EdgeDirection.Either).map(x => (x._1, x._2.filter(p => p > 1000))).filter(_._1 <= 1000)
37 |     val combinations = neighbors.cartesian(neighbors).filter { case (a, b) => a._1 < b._1 }
38 |     val SimilarityAll = combinations.map { x => (x._1._1, x._2._1, jaccard(x._1._2.toSet, x._2._2.toSet)) }
39 |     val result = SimilarityAll.map(x => (x._3, (x._1, x._2))).sortByKey(false, 1).map(x => (x._2._1, x._2._2, x._1))
40 |     result
41 |   }
42 | 
43 |   def jaccard[A](a: Set[A], b: Set[A]): Double = {
44 |     /**
45 |       * Helper function
46 |       * *
47 |       * Given two sets, compute its Jaccard similarity and return its result.
48 |       * If the union part is zero, then return 0.
49 |       */
50 | 
51 | 
52 |     val union: Double = (a ++ b).size
53 |     val intersect: Double = a.intersect(b).size
54 |     return (if (union == 0) 0.0 else (intersect / union))
55 |   }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/edu/gatech/cse8803/main/Main.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * @author Ting Pan <tpan35@gatech.edu>.
 3 |   */
 4 | 
 5 | package edu.gatech.cse8803.main
 6 | 
 7 | import java.text.SimpleDateFormat
 8 | 
 9 | import edu.gatech.cse8803.ioutils.CSVUtils
10 | import edu.gatech.cse8803.jaccard.Jaccard
11 | import edu.gatech.cse8803.model._
12 | import edu.gatech.cse8803.randomwalk.RandomWalk
13 | import edu.gatech.cse8803.clustering.PowerIterationClustering
14 | import org.apache.spark.rdd.RDD
15 | 
16 | import org.apache.spark.sql.SQLContext
17 | import org.apache.spark.{SparkConf, SparkContext}
18 | import edu.gatech.cse8803.graphconstruct.GraphLoader
19 | 
20 | 
21 | object Main {
22 |   def main(args: Array[String]) {
23 |     import org.apache.log4j.Logger
24 |     import org.apache.log4j.Level
25 | 
26 |     Logger.getLogger("org").setLevel(Level.WARN)
27 |     Logger.getLogger("akka").setLevel(Level.WARN)
28 | 
29 |     val sc = createContext
30 |     val sqlContext = new SQLContext(sc)
31 | 
32 |     /** initialize loading of data */
33 |     val (patient, medication, labResult, diagnostic) = loadRddRawData(sqlContext)
34 |     val patientGraph = GraphLoader.load(patient, labResult, medication, diagnostic)
35 | 
36 |     println(Jaccard.jaccardSimilarityOneVsAll(patientGraph, 9))
37 |     println(RandomWalk.randomWalkOneVsAll(patientGraph, 9))
38 | 
39 |     val similarities = Jaccard.jaccardSimilarityAllPatients(patientGraph)
40 | 
41 |     val PICLabels = PowerIterationClustering.runPIC(similarities)
42 | 
43 |     sc.stop()
44 |   }
45 | 
46 |   def loadRddRawData(sqlContext: SQLContext): (RDD[PatientProperty], RDD[Medication], RDD[LabResult], RDD[Diagnostic]) = {
47 | 
48 |     val dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX")
49 |     /** test data, must change back!! */
50 |     List("data/PATIENT.csv", "data/LAB.csv", "data/DIAGNOSTIC.csv", "data/MEDICATION.csv")
51 |       .foreach(CSVUtils.loadCSVAsTable(sqlContext, _))
52 | 
53 |     val patient = sqlContext.sql( // fix this
54 |       """
55 |         |SELECT subject_id, sex, dob, dod
56 |         |FROM PATIENT
57 |       """.stripMargin)
58 |       .map(r => PatientProperty(r(0).toString, r(1).toString, r(2).toString, r(3).toString))
59 | 
60 |     val labResult = sqlContext.sql(
61 |       """
62 |         |SELECT subject_id, date, lab_name, value
63 |         |FROM LAB
64 |         |WHERE value IS NOT NULL and value <> ''
65 |       """.stripMargin)
66 |       .map(r => LabResult(r(0).toString, r(1).toString.toLong, r(2).toString, r(3).toString))
67 | 
68 |     val diagnostic = sqlContext.sql(
69 |       """
70 |         |SELECT subject_id, date, code, sequence
71 |         |FROM DIAGNOSTIC
72 |       """.stripMargin)
73 |       .map(r => Diagnostic(r(0).toString, r(1).toString.toLong, r(2).toString, r(3).toString.toInt))
74 | 
75 |     val medication = sqlContext.sql(
76 |       """
77 |         |SELECT subject_id, date, med_name
78 |         |FROM MEDICATION
79 |       """.stripMargin)
80 |       .map(r => Medication(r(0).toString, r(1).toString.toLong, r(2).toString))
81 | 
82 |     (patient, medication, labResult, diagnostic)
83 | 
84 |   }
85 | 
86 | 
87 |   def createContext(appName: String, masterUrl: String): SparkContext = {
88 |     val conf = new SparkConf().setAppName(appName).setMaster(masterUrl)
89 |     new SparkContext(conf)
90 |   }
91 | 
92 |   def createContext(appName: String): SparkContext = createContext(appName, "local")
93 | 
94 |   def createContext: SparkContext = createContext("CSE 8803 Homework Three Application", "local")
95 | }
96 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/edu/gatech/cse8803/model/models.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * @author Ting Pan <tpan35@gatech.edu>.
 3 |   */
 4 | 
 5 | package edu.gatech.cse8803.model
 6 | 
 7 | case class LabResult(patientID: String, date: Long, labName: String, value: String)
 8 | 
 9 | case class Diagnostic(patientID: String, date: Long, icd9code: String, sequence: Int)
10 | 
11 | case class Medication(patientID: String, date: Long, medicine: String)
12 | 
13 | abstract class VertexProperty
14 | 
15 | case class PatientProperty(patientID: String, sex: String, dob: String, dod: String) extends VertexProperty
16 | 
17 | case class LabResultProperty(testName: String) extends VertexProperty
18 | 
19 | case class DiagnosticProperty(icd9code: String) extends VertexProperty
20 | 
21 | case class MedicationProperty(medicine: String) extends VertexProperty
22 | 
23 | abstract class EdgeProperty
24 | 
25 | case class SampleEdgeProperty(name: String = "Sample") extends EdgeProperty
26 | 
27 | case class PatientLabEdgeProperty(labResult: LabResult) extends EdgeProperty
28 | 
29 | case class PatientDiagnosticEdgeProperty(diagnostic: Diagnostic) extends EdgeProperty
30 | 
31 | case class PatientMedicationEdgeProperty(medication: Medication) extends EdgeProperty
32 | 
33 | 


--------------------------------------------------------------------------------
/apply/src/main/scala/edu/gatech/cse8803/randomwalk/randomwalk.scala:
--------------------------------------------------------------------------------
 1 | package edu.gatech.cse8803.randomwalk
 2 | 
 3 | import edu.gatech.cse8803.model.{PatientProperty, EdgeProperty, VertexProperty}
 4 | import org.apache.spark.graphx._
 5 | 
 6 | object RandomWalk {
 7 | 
 8 |   def randomWalkOneVsAll(graph: Graph[VertexProperty, EdgeProperty], patientID: Long, numIter: Int = 100, alpha: Double = 0.15): List[Long] = {
 9 |     /**
10 |       * Given a patient ID, compute the random walk probability w.r.t. to all other patients.
11 |       * Return a List of patient IDs ordered by the highest to the lowest similarity.
12 |       * For ties, random order is okay
13 |     */
14 | 
15 |     val patient = graph.vertices.filter(_._2.isInstanceOf[PatientProperty])
16 |     val patient_count = patient.keys.max()
17 | 
18 |     val personalized = true
19 |     val src: VertexId = patientID
20 | 
21 |     var rankGraph: Graph[Double, Double] = graph
22 |       // Associate the degree with each vertex
23 |       .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
24 |       // Set the weight on the edges based on the degree
25 |       .mapTriplets( e => 1.0 / e.srcAttr, TripletFields.Src )
26 |       // Set the vertex attributes to the initial pagerank values
27 |       .mapVertices { (id, attr) =>
28 |       if (!(id != src && personalized)) alpha else 0.0
29 |     }
30 | 
31 |     def delta(u: VertexId, v: VertexId): Double = { if (u == v) 1.0 else 0.0 }
32 | 
33 |     var iteration = 0
34 |     var prevRankGraph: Graph[Double, Double] = null
35 |     while (iteration < numIter) {
36 |       rankGraph.cache()
37 | 
38 |       // Compute the outgoing rank contributions of each vertex, perform local preaggregation, and
39 |       // do the final aggregation at the receiving vertices. Requires a shuffle for aggregation.
40 |       val rankUpdates = rankGraph.aggregateMessages[Double](
41 |         ctx => ctx.sendToDst(ctx.srcAttr * ctx.attr), _ + _, TripletFields.Src)
42 | 
43 |       // Apply the final rank updates to get the new ranks, using join to preserve ranks of vertices
44 |       // that didn't receive a message. Requires a shuffle for broadcasting updated ranks to the
45 |       // edge partitions.
46 |       prevRankGraph = rankGraph
47 |       // new update rule
48 |       //PR[i] = (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum (if i not start node)
49 |       //PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum (if i is start node)
50 |       val rPrb = {
51 |         (src: VertexId, id: VertexId) => alpha * delta(src, id)
52 |       }
53 |       rankGraph = rankGraph.joinVertices(rankUpdates) {
54 |         (id, oldRank, msgSum) => rPrb(src, id) + (1.0 - alpha) * msgSum
55 |       }.cache()
56 | 
57 |       rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
58 | //      logInfo(s"PageRank finished iteration $iteration.")
59 |       prevRankGraph.vertices.unpersist(false)
60 |       prevRankGraph.edges.unpersist(false)
61 | 
62 |      /** println("iteration: "+iteration)
63 |       println()
64 |       println(rankGraph.vertices.filter(_._1<=1000).filter( _._1!=patientID).sortBy(_._2,false).take(15).foreach(println))*/
65 |       iteration += 1
66 |     }
67 | 
68 |     val result = rankGraph.vertices.filter(_._1<=1000).filter( _._1!=patientID).sortBy(_._2,false).map(_._1).take(10).toList
69 |     result
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/CollectionUtil.scala:
--------------------------------------------------------------------------------
 1 | import scala.collection.mutable.ArrayBuffer
 2 | import scala.reflect.ClassTag
 3 | 
 4 | /**
 5 |   * Created by liuchen on 2017/8/10.
 6 |   * Description:
 7 |   */
 8 | object CollectionUtil {
 9 | 
10 |   /**
11 |     * 对具有Traversable[(K, V)]类型的集合添加reduceByKey相关方法
12 |     *
13 |     * @param collection
14 |     * @param kt
15 |     * @param vt
16 |     * @tparam K
17 |     * @tparam V
18 |     */
19 |   implicit class CollectionHelper[K, V](collection: ArrayBuffer[(K, V)])(implicit kt: ClassTag[K], vt: ClassTag[V]) {
20 |     def reduceByKeyMy(f: (V, V) => V): Traversable[(K, V)] = {
21 |       val group: Map[K, ArrayBuffer[(K, V)]] = collection.groupBy(_._1)
22 |       group.map(x => x._2.reduce((a, b) => (a._1, f(a._2, b._2))))
23 |     }
24 | 
25 | 
26 |     /**
27 |       * reduceByKey的同时，返回被reduce掉的元素的集合
28 |       *
29 |       * @param f
30 |       * @return
31 |       */
32 |     def reduceByKeyWithReduced(f: (V, V) => V)(implicit kt: ClassTag[K], vt: ClassTag[V]): (Traversable[(K, V)], Traversable[(K, V)]) = {
33 |       val reduced: ArrayBuffer[(K, V)] = ArrayBuffer()
34 |       val newSeq = collection.groupBy(_._1).map {
35 |         case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => {
36 |           val newValue: V = f(a._2, b._2)
37 |           val reducedValue: V = if (newValue == a._2) b._2 else a._2
38 |           val reducedPair: (K, V) = (a._1, reducedValue)
39 |           reduced += reducedPair
40 |           (a._1, newValue)
41 |         })
42 |       }
43 |       (newSeq, reduced.toTraversable)
44 |     }
45 |   }
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/CreateApplyData.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.commons.lang3.StringUtils
 2 | import org.apache.spark.{SparkConf, SparkContext}
 3 | import com.lakala.datacenter.utils.UtilsToos._
 4 | import scala.util.Random
 5 | 
 6 | /**
 7 |   * Created by ASUS-PC on 2017/4/18.
 8 |   */
 9 | object CreateApplyData {
10 | 
11 |   def main(args: Array[String]): Unit = {
12 |     val conf = new SparkConf().setMaster("local[2]").setAppName("CreateApplyData")
13 |     val sc = new SparkContext(conf)
14 |     val callLine = sc.textFile("file:///F:/lakalaFinance_workspaces/applogs/000000_0")
15 |     val applyLine = sc.textFile("file:///F:/lakalaFinance_workspaces/applogs/query_result.csv").filter(line => (!line.startsWith("s_c_loan_apply")))
16 |     val call = callLine.mapPartitions { lines =>
17 |       lines.map { line =>
18 |         var arr = line.split("\u0001")
19 |         (if (StringUtils.isNotBlank(arr(4)) && isMobileOrPhone(arr(4))) arr(4) else "", if (StringUtils.isNotBlank(arr(6)) && isMobileOrPhone(arr(6))) arr(6) else "")
20 |       }
21 |     }
22 |     val list = call.filter(k => StringUtils.isNotBlank(k._1)).map(k => k._1.toLong).union(call.filter(k => StringUtils.isNotBlank(k._2)).map(k => k._2.toLong)).collect().toSet.toList
23 |     println("mobil ************************")
24 |     list.sorted.foreach(println)
25 |     println("mobil ************************")
26 |     val ac = sc.broadcast(list)
27 |     applyLine.mapPartitions {
28 |       val list: List[Long] = ac.value
29 |       val seed: Int = list.size
30 |       lines => lines.map {
31 |         line =>
32 |           var arr = line.split(",")
33 |           val index = getIndex(seed)
34 |           val s = if (StringUtils.isBlank(arr(41)) || "null".equals(arr(41).toLowerCase)) "," + list(index) + ","
35 |           else if (StringUtils.isNotBlank(arr(41)) && !isMobileOrPhone(arr(41))) "," + list(index) + ","
36 |           else "," + arr(41) + ","
37 |           s"${arr.slice(0, 41).mkString(",")}$s${arr.slice(42, arr.length).mkString(",")}"
38 |       }
39 |     }.repartition(1).saveAsTextFile("file:///F:/lakalaFinance_workspaces/applogs2/query_result.csv")
40 |   }
41 | 
42 |   def getIndex(seed: Int): Int = {
43 |     val rand = new Random()
44 |     rand.nextInt(seed)
45 |   }
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/CreateApplyData2.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | import org.apache.commons.lang3.StringUtils
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | import com.lakala.datacenter.utils.UtilsToos._
 5 | import scala.util.Random
 6 | /**
 7 |   * Created by ASUS-PC on 2017/4/18.
 8 |   */
 9 | object CreateApplyData2 {
10 | 
11 |   def main(args: Array[String]): Unit = {
12 |     val conf = new SparkConf().setMaster("local[2]").setAppName("CreateApplyData2")
13 |     val sc = new SparkContext(conf)
14 |     val callLine = sc.textFile("file:///F:/lakalaFinance_workspaces/applogs/000000_0")
15 |     val call = callLine.mapPartitions { lines =>
16 |       lines.map { line =>
17 |         var arr = line.split("\u0001")
18 |         (s"${if (StringUtils.isNotBlank(arr(4)) && isMobileOrPhone(arr(4))) arr(4) else "0"},${if (StringUtils.isNotBlank(arr(6)) && isMobileOrPhone(arr(6))) arr(6) else "0"}")
19 |       }
20 |     }
21 |     call.distinct().repartition(1).saveAsTextFile("file:///F:/lakalaFinance_workspaces/applogs3/query_result.csv")
22 |   }
23 | 
24 |   def getIndex(seed: Int): Int = {
25 |     val rand = new Random()
26 |     rand.nextInt(seed)
27 |   }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/EdgeTuplesTest.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.log4j.{Level, Logger}
 2 | import org.apache.spark.graphx._
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | import utils.GraphNdegUtil2
 5 | 
 6 | /**
 7 |   * Created by ASUS-PC on 2017/4/19.
 8 |   */
 9 | object EdgeTuplesTest {
10 |   def main(args: Array[String]): Unit = {
11 |     val conf = new SparkConf().setMaster("local[2]").setAppName("CreateApplyData")
12 |     val sc = new SparkContext(conf)
13 |     Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
14 |     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
15 | //    val orderMobile = GraphLoader.edgeListFile(sc, "file:///F:/lakalaFinance_workspaces/graphx-analysis/apply/data3/part-00003")
16 |     val orderMobile = GraphLoader.edgeListFile(sc, "file:///F:/lakalaFinance_workspaces/graphx-analysis/apply/data/friends.txt")
17 |     //    val orderMobile = sc.textFile("file:///F:/lakalaFinance_workspaces/graphx-analysis/apply/data3/part-00000")
18 | //    val edgeTuple = orderMobile.mapPartitions { lines =>
19 | //      lines.map { line =>
20 | //        val arr = line.split(",")
21 | //        (arr(0).toLong, arr(1).toLong)
22 | //      }
23 | //    }
24 | 
25 |     val validGraph = orderMobile.subgraph(k => k.srcId != 0 && k.dstId != 0)
26 | //    val choiceRdd = sc.parallelize(Seq(18028726374L, 18692892122L, 13761981426L))
27 |     val choiceRdd = sc.parallelize(Seq(6L))
28 | 
29 |     val rss: VertexRDD[Map[Int, Set[VertexId]]] = GraphNdegUtil2.aggNdegreedVertices(validGraph, choiceRdd, 3)
30 |    println("00000++++++0000000")
31 |     rss.foreach { k =>
32 |       println(s"${k._1}${k._2.map(kk => k._2.map(kkk => kkk._2.toArray.mkString(",")))}")
33 |     }
34 | 
35 |     //    val applyLine = sc.textFile("file:///F:/lakalaFinance_workspaces/applogs/query_result.csv").filter(line => (!line.startsWith("s_c_loan_apply")))
36 |     //    val rs = applyLine.mapPartitions { lines =>
37 |     //      lines.map { line =>
38 |     //        val arr = line.split(",")
39 |     //        val term_id = if (StringUtils.isNotBlank(arr(7)) && !"null".equals(arr(7).toLowerCase)) arr(7) else "OL"
40 |     //        val return_pan = if (StringUtils.isNotBlank(arr(16)) && !"null".equals(arr(16).toLowerCase)) arr(16) else "0L"
41 |     //        val empmobile = if (StringUtils.isNotBlank(arr(41)) && !"null".equals(arr(41).toLowerCase)) arr(41) else "0L"
42 |     //        ((s"${hashId(arr(1))},${hashId(term_id)}"), (s"${hashId(arr(1))},${hashId(return_pan)}"), (s"${hashId(arr(1))},${empmobile}"))
43 |     //      }
44 |     //    }
45 |     //    val edge1: RDD[String] = rs.map(ve => ve._1).filter(k => !k.endsWith("," + hashId("0L")))
46 |     //    val edge2: RDD[String] = rs.map(ve => ve._2).filter(k => !k.endsWith("," + hashId("0L")))
47 |     //    val edge3: RDD[String] = rs.map(ve => ve._3).filter(k => !k.endsWith(",0L"))
48 |     //    edge1.union(edge2).union(edge3).repartition(1).saveAsTextFile("file:///F:/lakalaFinance_workspaces/graphx-analysis/apply/data3")
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/GraphxBSP3.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | import org.apache.commons.lang3.StringUtils
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by Administrator on 2017/6/16 0016.
 7 |   */
 8 | object GraphxBSP3 {
 9 |   def main(args: Array[String]): Unit = {
10 |     @transient
11 |     val conf = new SparkConf().setAppName("GraphxBSP").setMaster("local[4]")
12 |     @transient
13 |     val sc = new SparkContext(conf)
14 |     //orderId,contractNo,termId,loanPan,returnPan,insertTime,recommend,userId,
15 |     // deviceId
16 |     //certNo,email,company,mobile,compAddr,compPhone,emergencyContactMobile,contactMobile,ipv4,msgphone,telecode
17 |     val edgeRDD = sc.textFile("F:\\graphx-analysis\\apply\\bin\\test.csv").mapPartitions(lines => lines.map { line =>
18 |       val fields = line.split(",")
19 |       val kv = if (StringUtils.isNoneEmpty(fields(2))) {
20 |         (fields(2), 1)
21 |       } else
22 |         ("0", 0)
23 |       kv
24 |     }).reduceByKey(_ + _).filter(_._2 > 2)
25 |     edgeRDD.foreach(kv=>println(kv._1+"  === "+kv._2))
26 | 
27 |   }
28 | 
29 | }
30 | */
31 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/Median.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Created by linyanshi on 2017/8/19 0019.
 3 |   */
 4 | 
 5 | import org.apache.spark.{SparkConf, SparkContext}
 6 | 
 7 | object Median {
 8 |   def main(args: Array[String]) {
 9 |     val conf = new SparkConf().setAppName("Spark Pi")
10 |     val spark = new SparkContext(conf)
11 |     val data = spark.textFile("data")
12 |     /*将数据逻辑划分为10个桶,这里用户可以自行设置桶数量,统计每个桶中落入的数据量*/
13 |     val mappeddata = data.map(num => {
14 |       (num.toInt / 1000, num)
15 |     })
16 | 
17 |     val count: Array[(Int, String)] = mappeddata.reduceByKey((a, b) => {
18 |       a + b
19 |     }).collect()
20 | 
21 |     /*根据总的数据量,逐次根据桶序号由低到高依次累加,判断中位数落在哪个桶中,并获取到中位数在桶中的偏移量*/
22 |     val sum_count = count.map(data => {
23 |       data._2.toInt
24 |     }).sum
25 | 
26 |     var temp = 0
27 |     var index = 0
28 |     var mid = sum_count.toInt / 2
29 |     for (i <- 0 to 10) {
30 |       temp = temp + count(i)._2.toInt
31 |       if (temp >= mid) {
32 |         index = i
33 |       }
34 |     }
35 |     /*中位数在桶中的偏移量*/
36 |     val offset = temp - mid
37 |     /*获取到中位数所在桶中的偏移量为offset的数,也就是中位数*/
38 |     val result = mappeddata.filter(num => num._1 == index).takeOrdered(offset)
39 |     println("Median is " + result(offset))
40 |     spark.stop()
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/NDegreeResult.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.{SparkConf, SparkContext}
 2 | import org.apache.spark.graphx._
 3 | 
 4 | /**
 5 |   * Created by lys on 2017/4/23.
 6 |   */
 7 | object NDegreeResult {
 8 |   def main(args: Array[String]): Unit = {
 9 |     val conf = new SparkConf()
10 |     conf.setMaster("local[2]")
11 |     conf.setAppName("DTWWW")
12 |     val sc = new SparkContext(conf);
13 |     val edge = List(//边的信息
14 |       (1, 2), (1, 3), (2, 3), (3, 4), (3, 5), (3, 6),
15 |       (4, 5), (5, 6), (7, 8), (7, 9), (8, 9),(2,11),(6,11),(2,12),(6,12))
16 |     //构建边的rdd
17 |     val edgeRdd = sc.parallelize(edge).map(x => {
18 |       Edge(x._1.toLong, x._2.toLong, None)
19 |     })
20 |     //构建图 顶点Int类型
21 |     val g = Graph.fromEdges(edgeRdd, 0)
22 |     //可以了解图中“超级节点”的个数和规模，以及所有节点度的分布曲线。
23 |     g.degrees.collect.foreach(println(_))
24 |     //使用两次遍历，首先进行初始化的时候将自己的生命值设为2，
25 |     // 第一次遍历向邻居节点传播自身带的ID以及生命值为1(2-1)的消息，
26 |     // 第二次遍历的时候收到消息的邻居再转发一次，生命值为0，
27 |     // 最终汇总统计的时候 只需要对带有消息为0 ID的进行统计即可得到二跳邻居
28 | 
29 | 
30 |     type VMap = Map[VertexId, Int]
31 | 
32 |     /**
33 |       * 节点数据的更新 就是集合的union
34 |       */
35 |     def vprog(vid: VertexId, vdata: VMap, message: VMap): Map[VertexId, Int] = addMaps(vdata, message)
36 | 
37 |     /**
38 |       * 发送消息
39 |       */
40 |     def sendMsg(e: EdgeTriplet[VMap, _]) = {
41 |       //取两个集合的差集  然后将生命值减1
42 |       val srcMap:Map[VertexId, Int] = (e.dstAttr.keySet -- e.srcAttr.keySet).map { k => k -> (e.dstAttr(k) - 1) }.toMap
43 |       val dstMap:Map[VertexId, Int] = (e.srcAttr.keySet -- e.dstAttr.keySet).map { k => k -> (e.srcAttr(k) - 1) }.toMap
44 | 
45 |       if (srcMap.size == 0 && dstMap.size == 0)
46 |         Iterator.empty
47 |       else
48 |         Iterator((e.dstId, dstMap), (e.srcId, srcMap))
49 |     }
50 | 
51 |     /**
52 |       * 消息的合并
53 |       */
54 |     def addMaps(spmap1: VMap, spmap2: VMap): VMap =
55 |     (spmap1.keySet ++ spmap2.keySet).map {
56 |       k => k -> math.min(spmap1.getOrElse(k, Int.MaxValue), spmap2.getOrElse(k, Int.MaxValue))
57 |     }.toMap
58 | 
59 |     val two = 2 //这里是二跳邻居 所以只需要定义为2即可
60 |     val newG = g.mapVertices((vid, _) => Map[VertexId, Int](vid -> two))
61 |       .pregel(Map[VertexId, Int](), two, EdgeDirection.Out)(vprog, sendMsg, addMaps)
62 | 
63 |     //可以看一下二次遍历之后各个顶点的数据：
64 |     newG.vertices.collect().foreach(println(_))
65 |     //    (4,Map(5 -> 1, 1 -> 0, 6 -> 0, 2 -> 0, 3 -> 1, 4 -> 2))
66 |     //    (6,Map(5 -> 1, 1 -> 0, 6 -> 2, 2 -> 0, 3 -> 1, 4 -> 0))
67 |     //    (8,Map(8 -> 2, 7 -> 1, 9 -> 1))
68 |     //    (2,Map(5 -> 0, 1 -> 1, 6 -> 0, 2 -> 2, 3 -> 1, 4 -> 0))
69 |     //    (1,Map(5 -> 0, 1 -> 2, 6 -> 0, 2 -> 1, 3 -> 1, 4 -> 0))
70 |     //    (3,Map(5 -> 1, 1 -> 1, 6 -> 1, 2 -> 1, 3 -> 2, 4 -> 1))
71 |     //    (7,Map(7 -> 2, 8 -> 1, 9 -> 1))
72 |     //    (9,Map(9 -> 2, 7 -> 1, 8 -> 1))
73 |     //    (5,Map(5 -> 2, 1 -> 0, 6 -> 1, 2 -> 0, 3 -> 1, 4 -> 1))
74 |     //    Map中的key表示周边的顶点id，其value就是对应顶点id的生命值，所以我们现在对该rdd再做一次mapValues处理即可得到最后的二跳邻居
75 |     //过滤得到二跳邻居 就是value=0 的顶点
76 |     val twoJumpFirends = newG.vertices
77 |       .mapValues(_.filter(_._2 == 0).keys)
78 | 
79 |     twoJumpFirends.collect().foreach(println(_))
80 |     //    (4,Set(1, 6, 2))
81 |     //    (6,Set(1, 2, 4))
82 |     //    (8,Set())
83 |     //    (2,Set(5, 6, 4))
84 |     //    (1,Set(5, 6, 4))
85 |     //    (3,Set())
86 |     //    (7,Set())
87 |     //    (9,Set())
88 |     //    (5,Set(1, 2))
89 | 
90 |   }
91 | }
92 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/NumOnce.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Created by linyanshi on 2017/8/19 0019.
 3 |   */
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | import org.apache.spark.SparkContext._
 6 | 
 7 | object NumOnce {
 8 |   //利用异或运算将列表中的所有ID异或，之后得到的值即为所求ID。先将每个分区的数据
 9 |   //异或，然后将结果进行异或运算。
10 |   def computeOneNum(args:Array[String]) {
11 |     val conf  = new SparkConf().setAppName("NumOnce").setMaster("local[1]")
12 |     val spark = new SparkContext(conf)
13 |     val data = spark.textFile("data")
14 |     /*每个分区分别对数据进行异或运算,最后在reduceByKey阶段,将各分区异或运算的结果再做异或运算合并。
15 |     偶数次出现的数字,异或运算之后为0,奇数次出现的数字,异或后为数字本身*/
16 |     val result = data.mapPartitions(iter => {
17 |       var temp = iter.next().toInt
18 |       while(iter.hasNext) {
19 |         temp = temp^(iter.next()).toInt
20 |       }
21 |       Seq((1, temp)).iterator
22 |     }).reduceByKey(_^_).collect()
23 |     println("num appear once is: "+result(0))
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/ParsesTest.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.{SparkConf, SparkContext}
 2 | import org.apache.spark.sql.SQLContext
 3 | 
 4 | /**
 5 |   * Created by Administrator on 2017/8/4 0004.
 6 |   */
 7 | object ParsesTest {
 8 |   //  case class Data(index: String, title: String, content: String)
 9 |   //  def main(args: Array[String]): Unit = {
10 |   //    val conf = new SparkConf().setAppName("WordCount").setMaster("local")
11 |   //    val sc = new SparkContext(conf)
12 |   //    val input = sc.textFile("F:\\out\\output")
13 |   //    //wholeTextFiles读出来是一个RDD(String,String)
14 |   //    val result = input.map{line=>
15 |   //      val reader = new CSVReader(new StringReader(line));
16 |   //      reader.readAll().map(x => Data(x(0), x(1), x(2)))
17 |   //    }
18 |   //    for(res <- result){
19 |   //      println(res)
20 |   //    }
21 |   //  }
22 |   def main(args: Array[String]): Unit = {
23 |     val conf = new SparkConf().setAppName("ParsesTest").setMaster("local")
24 |     val sc = new SparkContext(conf)
25 |     val sqlContext = new SQLContext(sc)
26 |     val df = sqlContext.load("com.databricks.spark.csv", Map("path" -> "F:\\out\\output\\*", "header" -> "true"))
27 |     df.select("index", "title").foreach(row=>println(row.get(0)))
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/TestCSV.scala:
--------------------------------------------------------------------------------
 1 | import com.lakala.datacenter.core.utils.UtilsToos
 2 | 
 3 | import scala.collection.mutable.ArrayBuffer
 4 | import scala.util.matching.Regex
 5 | 
 6 | /**
 7 |   * Created by Administrator on 2017/8/4 0004.
 8 |   */
 9 | object TestCSV {
10 | 
11 |   case class Data(index: String, title: String, content: String)
12 | 
13 |   val arr = Array(4)
14 | 
15 |   def main(args: Array[String]) {
16 |     val value ="1472100411047"
17 |     val pattern = new Regex("[0-9]{1,}")
18 |     if(pattern.pattern.matcher(value).matches())
19 |       println(value.toLong)
20 |   }
21 | 
22 |   private def splitSpecificDelimiterData(line: String): String = {
23 |     val context = new StringBuffer()
24 |     val haveSplitAtt = line.split(",")
25 | 
26 |     val oneSplitAtt = haveSplitAtt(1).split("\\|")
27 |     for (i <- 0 until (oneSplitAtt.length)) {
28 |       if (arr(0) == 4) {
29 |         val secondSplitAtt = haveSplitAtt(3).split("\\|")
30 |         for (j <- 0 until (secondSplitAtt.length)) {
31 |           if (j == secondSplitAtt.length - 1)
32 |             context.append(s"${haveSplitAtt(0)},${oneSplitAtt(i)},${haveSplitAtt(2)},${secondSplitAtt(j)},${haveSplitAtt(haveSplitAtt.size - 1)}")
33 |           else
34 |             context.append(s"${haveSplitAtt(0)},${oneSplitAtt(i)},${haveSplitAtt(2)},${secondSplitAtt(j)},${haveSplitAtt(haveSplitAtt.size - 1)}\n")
35 |         }
36 |       } else {
37 |         if (i == oneSplitAtt.length - 1)
38 |           context.append(s"${haveSplitAtt(0)},${oneSplitAtt(i)},${haveSplitAtt(haveSplitAtt.size - 1)}")
39 |         else
40 |           context.append(s"${haveSplitAtt(0)},${oneSplitAtt(i)},${haveSplitAtt(haveSplitAtt.size - 1)}\n")
41 |       }
42 |     }
43 |     context.toString
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/TestRunGraphx.scala:
--------------------------------------------------------------------------------
 1 | import com.lakala.datacenter.core.utils.UtilsToos.hashId
 2 | import org.apache.spark.graphx.Edge
 3 | 
 4 | import scala.collection.mutable.ListBuffer
 5 | 
 6 | /**
 7 |   * Created by ASUS-PC on 2017/4/12.
 8 |   */
 9 | object TestRunGraphx {
10 |   def main(args: Array[String]): Unit = {
11 | //    RunLoadApplyGraphx2.main(Array())
12 | //        com.lakala.datacenter.main.Driver.main(args)
13 | //    val s ="15397661996->XNW28459058720408576"
14 | //    val ss = "13666199888->XNW28459058720408576"
15 | //    s.substring(0,s.indexOf("->"))
16 | //    println(s.substring(0,s.indexOf("->"))+"##"+ss.substring(0,ss.indexOf("->")))
17 |     val arry= args(0).split(",")
18 |     val edge =new EdgeArr("001","4334","7","0")
19 |     if(judgSendMsg(arry,edge)) println("=========")
20 |   }
21 |   def judgSendMsg(sendType: Array[String], edge: EdgeArr): Boolean = {
22 |     var flag = false
23 |     for (stype <- sendType) if (edge.srcType.equals(stype)) flag = true
24 |     flag
25 |   }
26 | 
27 | //  var messages = g.mapReduceTriplets(sendMsg,mergeMsg);
28 | //  print("messages:"+messages.take(10).mkString("\n"))
29 | //  var activeMessages = messages.count();
30 | //  //LOAD
31 | //  var prevG:Graph[VD,ED] = null
32 | //  var i = 0;
33 | //  while(activeMessages > 0 && i < maxIterations){
34 | //    //③Receive the messages.Vertices that didn‘t get any message do not appear in newVerts.
35 | //    //内联操作，返回的结果是VertexRDD，可以参看后面的调试信息
36 | //    val newVerts = g.vertices.innerJoin(messages)(vprog).cache();
37 | //    print("newVerts:"+newVerts.take(10).mkString("\n"))
38 | //    //④update the graph with the new vertices.
39 | //    prevG = g;//先把旧的graph备份，以利于后面的graph更新和unpersist掉旧的graph
40 | //    　　　　　//④外联操作，返回整个更新的graph
41 | //    g = g.outerJoinVertices(newVerts){(vid,old,newOpt) => newOpt.getOrElse(old)}//getOrElse方法，意味，如果newOpt存在，返回newOpt,不存在返回old
42 | //    print(g.vertices.take(10).mkString("\n"))
43 | //    g.cache();//新的graph cache起来，下一次迭代使用
44 | //
45 | //    val oldMessages = messages;//备份，同prevG = g操作一样
46 | //    //Send new messages.Vertices that didn‘t get any message do not appear in newVerts.so
47 | //    //don‘t send messages.We must cache messages.so it can be materialized on the next line.
48 | //    //allowing us to uncache the previous iteration.
49 | //    　　　 //⑤下一次迭代要发送的新的messages,先cache起来
50 | //    messages = g.mapReduceTriplets(sendMsg,mergeMsg,Some((newVerts,activeDirection))).cache()
51 | //    print("下一次迭代要发送的messages:"+messages.take(10).mkString("\n"))
52 | //    activeMessages = messages.count();//⑥
53 | //    print("下一次迭代要发送的messages的个数："+ activeMessages)//如果activeMessages==0，迭代结束
54 | //    logInfo("Pregel finished iteration" + i);
55 | //    　　　 //原来，旧的message和graph不可用了，unpersist掉
56 | //    oldMessages.unpersist(blocking= false);
57 | //    newVerts.unpersist(blocking=false)//unpersist之后，就不可用了
58 | //    prevG.unpersistVertices(blocking=false)
59 | //    prevG.edges.unpersist(blocking=false)
60 | //    i += 1;
61 | //  }
62 | //  g//返回最后的graph
63 | //}
64 | //
65 | //}
66 | 
67 | 
68 | 
69 |   //  val conf = if (ctx.isLocals) new Configuration else ctx.getSparkContext.hadoopConfiguration
70 |   //  val hdfsPath: String = hdfsMasterPath + path
71 |   //  rdd.saveAsTextFile(hdfsPath)
72 |   //  hiveCT.sql(s"ALTER TABLE $tableName  DROP PARTITION(execute_dt='$date', project_id='$project')")
73 |   //  hiveCT.sql(s"ALTER TABLE $tableName SET FILEFORMAT TEXTFILE")
74 |   //  hiveCT.sql(s"LOAD DATA INPATH '$hdfsPath/part-*' OVERWRITE INTO TABLE $tableName PARTITION (execute_dt='$date', project_id='$project')")
75 |   //  hiveCT.sql(s"ALTER TABLE $tableName SET FILEFORMAT RCFILE")
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/TrustRank.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.Logging
 2 | import org.apache.spark.graphx._
 3 | 
 4 | import scala.reflect.ClassTag
 5 | import scala.util.Random
 6 | 
 7 | /**
 8 |   * Created by linyanshi on 2017/9/19 0019.
 9 |   */
10 | object TrustRank extends Logging {
11 | 
12 |   /*
13 |    * VD : (double, double) denotes rank and score
14 |    * ED : double , not used
15 |    */
16 |   def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], numIter: Int): Long = {
17 |     val resetProb: Double = 0.15
18 |     val resetRank: Double = 0.15
19 | 
20 |     def resetScore: Double = Random.nextDouble()
21 | 
22 | 
23 |     var rankGraph: Graph[Double, Double] = graph
24 |       .outerJoinVertices(graph.outDegrees) { (vid, vd, deg) => deg.getOrElse(0) }
25 |       .mapTriplets(e => 1.0 / e.srcAttr, TripletFields.Src)
26 |       .mapVertices((id, attr) => resetRank)
27 | 
28 |     val scoreGraph: Graph[Double, _] = graph.mapVertices((id, attr) => resetScore).cache()
29 | 
30 |     var iteration = 0
31 | 
32 |     val start_ms = System.currentTimeMillis()
33 |     println("Start time : " + start_ms)
34 | 
35 |     while (iteration < numIter) {
36 |       val rankUpdates = rankGraph.aggregateMessages[Double](
37 |         ctx => ctx.sendToDst(ctx.srcAttr * ctx.attr),
38 |         _ + _,
39 |         TripletFields.Src
40 |       )
41 | 
42 |       // update rank and apply
43 |       rankGraph = rankGraph.joinVertices(rankUpdates) {
44 |         (id, old_vd, msgSum) => (1.0 - resetProb) * msgSum
45 |       }.joinVertices(scoreGraph.vertices) {
46 |         (id, rank, score) => (rank + resetProb * score)
47 |       }
48 | 
49 |       rankGraph.vertices.count() // materialize rank graph
50 |       logInfo(s"TrustRank finished iteration $iteration.")
51 | 
52 |       iteration += 1
53 | 
54 |     }
55 | 
56 | 
57 |     var end_ms = System.currentTimeMillis()
58 |     println("End time : " + end_ms)
59 | 
60 |     println("Cost : " + (end_ms - start_ms))
61 | 
62 |     end_ms - start_ms
63 |   }
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/UDF_test.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.sql.hive.HiveContext
 2 | import org.apache.spark.{SparkConf, SparkContext}
 3 | 
 4 | /**
 5 |   * Created by linyanshi on 2017/9/14 0014.
 6 |   */
 7 | object UDF_test {
 8 |   def main(args: Array[String]): Unit = {
 9 | 
10 |     val conf = new SparkConf()
11 |     implicit val sc = new SparkContext(conf)
12 |     implicit val sqlContext = new HiveContext(sc)
13 | 
14 |     import sqlContext.implicits._
15 | 
16 |     val data = sc.parallelize(Seq(("a", 1), ("bb", 5), ("cccc", 10), ("dddddd", 15))).toDF("a", "b")
17 |     data.registerTempTable("data")
18 | 
19 | 
20 |     {
21 |       //函数体采用原生类型（非Column类型），使用udf包装函数体，将函数体注册到sqlContext.udf
22 |       import org.apache.spark.sql.functions._
23 | 
24 |       //函数体
25 |       val filter_length_f = (str: String, _length: Int) => {
26 |         str.length > _length;
27 |       }
28 | 
29 |       //注册函数体到当前sqlContext，注意，注册到sqlContext的函数体，参数不能为Column
30 |       //注册后，可以在以下地方使用：1、df.selectExpr 2、df.filter ,3、将该df注册为temptable，之后在sql中使用
31 |       sqlContext.udf.register("filter_length", filter_length_f)
32 | 
33 |       val filter_length = udf(filter_length_f) //为方便使用Column，我们对函数体进行包装，包装后的输入参数为Column
34 | 
35 |       data.select($"*", filter_length($"a", lit(2))).show //使用udf包装过的，必须传入Column，注意 lit(2)
36 |       data.selectExpr("*", " filter_length(a,2) as ax").show //select 若写表达式调用函数，则需要使用selectExpr
37 | 
38 |       data.filter(filter_length($"a", lit(2))).show //同select
39 |       data.filter("filter_length(a,2)").show //filter调用表达式，可以直接使用df.filter函数，
40 | 
41 |       sqlContext.sql("select *,filter_length(a,2) from data").show
42 |       sqlContext.sql("select *,filter_length(a,2) from data where filter_length(a,2)").show
43 |     }
44 |     {
45 |       //函数体使用Column类型，无法注册到sqlContext.udf
46 |       //使用udf包装后，每列都必须输入column，能否我们自己定义呢，比如一个参数是Column，一个是其他类型
47 |       import org.apache.spark.sql.Column
48 |       import org.apache.spark.sql.functions._
49 | 
50 |       val filter_length_f2 = (str: Column, _length: Int) => {
51 |         length(str) > _length
52 |       }
53 |       sqlContext.udf.register("filter_length", filter_length_f2) //todo：不好意思，这里注册不了，注册到sqlContext.udf的函数，入参不支持Column类型
54 | 
55 |       data.select($"*", filter_length_f2($"a", 2)).show //不用udf包装，我们就可以完全自定义，这时 length 就可以传入整型了
56 |       data.selectExpr("*", " filter_length_f2(a,2) as ax").show //todo：不好意思，这里用不了了，
57 | 
58 |       data.filter(filter_length_f2($"a", 2)).show //同select
59 |       data.filter("filter_length(a,2)").show //todo：不好意思，这里用不了了
60 | 
61 |     }
62 |     //最后，我们写一个相对通用的吧
63 |     {
64 |       //定义两个函数体，入参一个使用column类型，一个使用原生类型，将原生类型函数注册到sqlContext.udf
65 | 
66 |       import org.apache.spark.sql.Column
67 |       import org.apache.spark.sql.functions._
68 | 
69 |       //函数体
70 |       val filter_length_f = (str: String, _length: Int) => {
71 |         str.length > _length;
72 |       }
73 |       //主函数，下面df.select df.filter 等中使用
74 |       val filter_length = (str: Column, _length: Int) => {
75 |         length(str) > _length
76 |       }
77 |       //注册函数体到当前sqlContext，注意，注册到sqlContext的函数体，参数不能为Column
78 |       //注册后，可以在以下地方使用：1、df.selectExpr 2、df.filter ,3、将该df注册为temptable，之后在sql中使用
79 |       sqlContext.udf.register("filter_length", filter_length_f)
80 | 
81 |       //这里我们不使用udf了，直接使用自己定义的支持Column的函数
82 |       //val filter_length = udf(filter_length_f) //为方便使用Column，我们对函数体进行包装，包装后的输入参数为Column
83 | 
84 |       data.select($"*", filter_length($"a", 2)).show //使用udf包装过的，必须传入Column，注意 lit(2)
85 |       data.selectExpr("*", " filter_length(a,2) as ax").show //select 若写表达式调用函数，则需要使用selectExpr
86 | 
87 |       data.filter(filter_length($"a", 2)).show //同select
88 |       data.filter("filter_length(a,2)").show //filter调用表达式，可以直接使用df.filter函数，
89 | 
90 |       sqlContext.sql("select *,filter_length(a,2) from data").show
91 |       sqlContext.sql("select *,filter_length(a,2) from data where filter_length(a,2)").show
92 |     }
93 | 
94 | 
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/entity/CallEntity.scala:
--------------------------------------------------------------------------------
 1 | package entity
 2 | 
 3 | import scala.collection.mutable.ListBuffer
 4 | 
 5 | /**
 6 |   * Created by ASUS-PC on 2017/4/19.
 7 |   */
 8 | case class CallEntity(var totalRounds: Int = 0, var propertyList: ListBuffer[String] = ListBuffer()) extends Serializable with Product {
 9 |   override def productElement(idx: Int): Any = idx match {
10 |     case 0 => totalRounds
11 |     case 1 => propertyList
12 |   }
13 | 
14 |   override def productArity: Int = 2
15 | 
16 |   override def canEqual(that: Any): Boolean = that.isInstanceOf[CallEntity]
17 | 
18 |   override def toString = s"CallEntity($totalRounds, ${propertyList.toArray.mkString(",")})"
19 | }
20 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/entity/CallVertex.scala:
--------------------------------------------------------------------------------
 1 | package entity
 2 | 
 3 | import scala.reflect.ClassTag
 4 | 
 5 | /**
 6 |   * Created by ASUS-PC on 2017/4/20.
 7 |   */
 8 | case class CallVertex[VD: ClassTag](var oldAttr: VD = null,
 9 |                                     var newAttr: VD = null,
10 |                                     var init: Boolean = false,
11 |                                     var loop: Int = 0)
12 |   extends Serializable {
13 | }
14 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/entity/TwoDegree.scala:
--------------------------------------------------------------------------------
 1 | package entity
 2 | 
 3 | /**
 4 |   * Created by ASUS-PC on 2017/4/24.
 5 |   */
 6 | case class TwoDegree (var attr:String ="",
 7 |                  var loop: Int = 0)
 8 |   extends Serializable {
 9 | }
10 | 


--------------------------------------------------------------------------------
/apply/src/test/scala/utils/CollectionUtil.scala:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import scala.collection.mutable.ArrayBuffer
 4 | import scala.reflect.ClassTag
 5 | 
 6 | /**
 7 |   * Created by ASUS-PC on 2017/4/19.
 8 |   */
 9 | 
10 | object CollectionUtil {
11 | 
12 |   /**
13 |     * 对具有Traversable[(K, V)]类型的集合添加reduceByKey相关方法
14 |     *
15 |     * @param collection
16 |     * @param kt
17 |     * @param vt
18 |     * @tparam K
19 |     * @tparam V
20 |     */
21 |   implicit class CollectionHelper[K, V](collection: Traversable[(K, V)])(implicit kt: ClassTag[K], vt: ClassTag[V]) {
22 |     def reduceByKey(f: (V, V) => V): Traversable[(K, V)] = {
23 |       collection.groupBy(_._1).map { case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => (a._1, f(a._2, b._2))) }}
24 | 
25 |     /**
26 |       * reduceByKey的同时，返回被reduce掉的元素的集合
27 |       *
28 |       * @param f
29 |       * @return
30 |       */
31 |     def reduceByKeyWithReduced(f: (V, V) => V)(implicit kt: ClassTag[K], vt: ClassTag[V]): (Traversable[(K, V)], Traversable[(K, V)]) = {
32 |       val reduced: ArrayBuffer[(K, V)] = ArrayBuffer()
33 |       val newSeq = collection.groupBy(_._1).map {
34 |         case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => {
35 |           val newValue: V = f(a._2, b._2)
36 |           val reducedValue: V = if (newValue == a._2) b._2 else a._2
37 |           val reducedPair: (K, V) = (a._1, reducedValue)
38 |           reduced += reducedPair
39 |           (a._1, newValue)
40 |         })
41 |       }
42 |       (newSeq, reduced.toTraversable)
43 |     }
44 |   }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/common/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <groupId>com.lakala.datacenter</groupId>
 7 |         <artifactId>graphx-analysis</artifactId>
 8 |         <version>1.0.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>graphx-analysis-common</artifactId>
13 | </project>
14 | 


--------------------------------------------------------------------------------
/common/src/main/resources/css/style.css:
--------------------------------------------------------------------------------
 1 | graph {
 2 |     fill-color: white;
 3 | }
 4 | node {
 5 |     size: 65;
 6 |     fill-color: #CCCCCC, #AAAAAA;
 7 |     fill-mode: gradient-radial;
 8 |     text-offset: 0, 0;
 9 |     stroke-mode: plain;
10 |     stroke-color: #333333;
11 | }
12 | node:clicked {
13 |     fill-color: #2277FF, #88AAFF;
14 |     fill-mode: gradient-radial;
15 |     size: 100;
16 |     text-size:18;
17 |     text-offset: 0, 0;
18 | }
19 | edge {
20 |     text-alignment: along;
21 | }
22 | 


--------------------------------------------------------------------------------
/common/src/test/data/cities_edges.txt:
--------------------------------------------------------------------------------
 1 | 1 2 75
 2 | 1 4 140
 3 | 1 8 118
 4 | 2 3 71
 5 | 3 4 151
 6 | 4 5 99
 7 | 4 6 80
 8 | 5 13 211
 9 | 6 7 97
10 | 6 12 146
11 | 7 13 101
12 | 7 12 138
13 | 8 9 111
14 | 9 10 70
15 | 10 11 75
16 | 11 12 120
17 | 13 14 90


--------------------------------------------------------------------------------
/common/src/test/data/cities_vertices.txt:
--------------------------------------------------------------------------------
 1 | 1 Arad 
 2 | 2 Zerind 
 3 | 3 Oradea 
 4 | 4 Sibiu 
 5 | 5 Fagaras 
 6 | 6 RimnicuVilcea
 7 | 7 Pitesti 
 8 | 8 Timisoara 
 9 | 9 Lugoj 
10 | 10 Mehadia 
11 | 11 Drobeta 
12 | 12 Craiova 
13 | 13 Bucharest 
14 | 14 Giurgiu 


--------------------------------------------------------------------------------
/common/src/test/data/likeness_edges.txt:
--------------------------------------------------------------------------------
 1 | 1 2 likes
 2 | 1 4 follows
 3 | 1 6 follows
 4 | 1 6 likes
 5 | 2 1 follows
 6 | 2 5 likes
 7 | 2 6 likes
 8 | 3 1 follows
 9 | 3 4 likes
10 | 4 2 likes
11 | 4 3 follows
12 | 5 3 likes
13 | 6 1 follows
14 | 6 4 likes


--------------------------------------------------------------------------------
/common/src/test/data/maxvalue_edges.txt:
--------------------------------------------------------------------------------
1 | 1 2
2 | 2 1
3 | 2 4
4 | 3 2
5 | 3 4
6 | 4 3


--------------------------------------------------------------------------------
/common/src/test/data/maxvalue_vertices.txt:
--------------------------------------------------------------------------------
1 | 1 3
2 | 2 6
3 | 3 2
4 | 4 1


--------------------------------------------------------------------------------
/common/src/test/data/papers_edges.txt:
--------------------------------------------------------------------------------
 1 | 1 2
 2 | 1 4
 3 | 1 6
 4 | 2 1
 5 | 2 6
 6 | 3 1
 7 | 3 4
 8 | 4 2
 9 | 4 5
10 | 5 2
11 | 5 3
12 | 6 1
13 | 6 4


--------------------------------------------------------------------------------
/common/src/test/data/people_vertices.txt:
--------------------------------------------------------------------------------
 1 | #ID NAME AGE
 2 | 1 tom 34
 3 | 2 chiara 51
 4 | 3  22
 5 | 4 marco 28
 6 | 5 lucia 40
 7 | 6 meria 32
 8 | 7 tommy 30
 9 | 8 giulio 45
10 | 9 ada 33


--------------------------------------------------------------------------------
/common/src/test/data/relationships_edges.txt:
--------------------------------------------------------------------------------
1 | 1 4
2 | 1 6
3 | 3 4
4 | 3 5
5 | 4 3
6 | 5 2
7 | 5 6
8 | 6 1
9 | 6 4


--------------------------------------------------------------------------------
/common/src/test/data/us_cities_edges.txt:
--------------------------------------------------------------------------------
1 | 1 2 27
2 | 1 3 91
3 | 2 3 35
4 | 2 5 67
5 | 3 4 48
6 | 3 5 14
7 | 5 4 29
8 | 5 6 15


--------------------------------------------------------------------------------
/common/src/test/data/us_cities_vertices.txt:
--------------------------------------------------------------------------------
1 | 1 Washington
2 | 2 Baltimore
3 | 3 Detroit
4 | 4 Chicago
5 | 5 NewYork
6 | 6 Philadelphia


--------------------------------------------------------------------------------
/common/src/test/data/users_dense_edges.txt:
--------------------------------------------------------------------------------
 1 | 1 2
 2 | 1 4
 3 | 2 3
 4 | 2 4
 5 | 2 5
 6 | 3 4
 7 | 5 1
 8 | 5 3
 9 | 5 6
10 | 6 1
11 | 6 3


--------------------------------------------------------------------------------
/common/src/test/data/users_disjoint_edges.txt:
--------------------------------------------------------------------------------
1 | 1 2
2 | 2 5
3 | 2 6
4 | 3 4
5 | 4 3
6 | 5 6
7 | 6 1


--------------------------------------------------------------------------------
/common/src/test/data/users_edges.txt:
--------------------------------------------------------------------------------
1 | 1 2
2 | 2 6
3 | 3 1
4 | 3 4
5 | 4 2
6 | 5 3
7 | 6 1
8 | 6 4


--------------------------------------------------------------------------------
/common/src/test/data/users_vertices.txt:
--------------------------------------------------------------------------------
1 | # ID USERNAME AGE
2 | 1 Alice 35
3 | 2 Bob 41
4 | 3 Carol 28
5 | 4 Dave 43
6 | 5 Eve 29
7 | 6 Frank 30


--------------------------------------------------------------------------------
/common/src/test/scala/TestGraphViewer.scala:
--------------------------------------------------------------------------------
 1 | import com.lakala.datacenter.common.graphstream.SimpleGraphViewer
 2 | 
 3 | /**
 4 |   * Created by peter on 2017/4/26.
 5 |   */
 6 | object TestGraphViewer {
 7 |   def main(args: Array[String]): Unit = {
 8 |     SimpleGraphViewer.main(Array())
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/core/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <groupId>com.lakala.datacenter</groupId>
 7 | 		<artifactId>graphx-analysis</artifactId>
 8 |         <version>1.0.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>graphx-analysis-core</artifactId>
13 | 
14 |     <dependencies>
15 |         <!-- module -->
16 |         <dependency>
17 |             <groupId>com.lakala.datacenter</groupId>
18 |             <artifactId>graphx-analysis-common</artifactId>
19 |             <version>${project.version}</version>
20 |         </dependency>
21 |     </dependencies>
22 | </project>


--------------------------------------------------------------------------------
/core/src/main/java/com/lakala/datacenter/core/messaging/Sender.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.messaging;
 2 | 
 3 | import com.lakala.datacenter.core.config.ConfigurationLoader;
 4 | import com.rabbitmq.client.Channel;
 5 | import com.rabbitmq.client.Connection;
 6 | import com.rabbitmq.client.ConnectionFactory;
 7 | import com.rabbitmq.client.MessageProperties;
 8 | 
 9 | import java.util.concurrent.TimeoutException;
10 | 
11 | public class Sender {
12 |     private static final String TASK_QUEUE_NAME = "processor";
13 | 
14 |     public static void sendMessage(String message)
15 |             throws java.io.IOException,
16 |             java.lang.InterruptedException, TimeoutException {
17 | 
18 |         ConnectionFactory factory = new ConnectionFactory();
19 |         factory.setHost(ConfigurationLoader.getInstance().getRabbitmqNodename());
20 |         Connection connection = factory.newConnection();
21 |         Channel channel = connection.createChannel();
22 | 
23 |         channel.queueDeclare(TASK_QUEUE_NAME, true, false, false, null);
24 | 
25 |         channel.basicPublish("", TASK_QUEUE_NAME,
26 |                 MessageProperties.PERSISTENT_TEXT_PLAIN,
27 |                 message.getBytes());
28 |         System.out.println(" [x] Sent '" + message + "'");
29 | 
30 |         channel.close();
31 |         connection.close();
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/core/src/main/java/com/lakala/datacenter/core/models/PartitionDescription.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.models;
 2 | 
 3 | public class PartitionDescription {
 4 |     private Long partitionId;
 5 |     private String partitionLabel;
 6 |     private String groupRelationship;
 7 |     private String targetRelationship;
 8 | 
 9 |     public String getPartitionLabel() {
10 |         return partitionLabel;
11 |     }
12 | 
13 |     public void setPartitionLabel(String partitionLabel) {
14 |         this.partitionLabel = partitionLabel;
15 |     }
16 | 
17 |     public Long getPartitionId() {
18 |         return partitionId;
19 |     }
20 | 
21 |     public void setPartitionId(Long partitionId) {
22 |         this.partitionId = partitionId;
23 |     }
24 | 
25 |     public String getTargetRelationship() {
26 |         return targetRelationship;
27 |     }
28 | 
29 |     public void setTargetRelationship(String targetRelationship) {
30 |         this.targetRelationship = targetRelationship;
31 |     }
32 | 
33 |     public String getGroupRelationship() {
34 |         return groupRelationship;
35 |     }
36 | 
37 |     public void setGroupRelationship(String groupRelationship) {
38 |         this.groupRelationship = groupRelationship;
39 |     }
40 | 
41 |     public PartitionDescription(Long partitionId, String partitionLabel) {
42 |         this.partitionId = partitionId;
43 |         this.partitionLabel = partitionLabel;
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/core/src/main/java/com/lakala/datacenter/core/models/ProcessorMessage.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.models;
 2 | 
 3 | /**
 4 |  * The ProcessorMessage class is used to distribute messages between the graph processor and Neo4j.
 5 |  */
 6 | public class ProcessorMessage {
 7 |     private String path;
 8 |     private String analysis;
 9 |     private ProcessorMode mode;
10 |     private PartitionDescription partitionDescription;
11 | 
12 |     public ProcessorMessage(String path, String analysis, ProcessorMode mode) {
13 |         this.path = path;
14 |         this.analysis = analysis;
15 |         this.mode = mode;
16 |     }
17 | 
18 |     /**
19 |      * Get the HDFS path.
20 |      * @return The path to the HDFS file for this process.
21 |      */
22 |     public String getPath() {
23 |         return path;
24 |     }
25 | 
26 |     /**
27 |      * Set the HDFS path.
28 |      * @param path The path to the HDFS file for this process.
29 |      */
30 |     public void setPath(String path) {
31 |         this.path = path;
32 |     }
33 | 
34 |     /**
35 |      * Get the analysis type.
36 |      * @return The key for the analysis type.
37 |      */
38 |     public String getAnalysis() {
39 |         return analysis;
40 |     }
41 | 
42 |     /**
43 |      * Set the analysis type.
44 |      * @param analysis The key for the analysis type.
45 |      */
46 |     public void setAnalysis(String analysis) {
47 |         this.analysis = analysis;
48 |     }
49 | 
50 |     /**
51 |      * Get the mode type.
52 |      * @return The mode type for the analysis, either partitioned or unpartitioned.
53 |      */
54 |     public ProcessorMode getMode() {
55 |         return mode;
56 |     }
57 | 
58 |     /**
59 |      * Set the mode type.
60 |      * @param mode The mode type represents whether the analysis should be partitioned.
61 |      */
62 |     public void setMode(ProcessorMode mode) {
63 |         this.mode = mode;
64 |     }
65 | 
66 |     /**
67 |      * Get the description for the partitioned analysis.
68 |      * @return Returns a description for the queried partition.
69 |      */
70 |     public PartitionDescription getPartitionDescription() {
71 |         return partitionDescription;
72 |     }
73 | 
74 |     /**
75 |      * Set the partition description for an analysis. Preserves information related to
76 |      * the analysis being performed on the current partition.
77 |      * @param partitionDescription A set of fields that describe the partition being analyzed.
78 |      */
79 |     public void setPartitionDescription(PartitionDescription partitionDescription) {
80 |         this.partitionDescription = partitionDescription;
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/core/src/main/java/com/lakala/datacenter/core/models/ProcessorMode.java:
--------------------------------------------------------------------------------
1 | package com.lakala.datacenter.core.models;
2 | 
3 | public enum ProcessorMode {
4 |     Partitioned,
5 |     Unpartitioned
6 | }
7 | 


--------------------------------------------------------------------------------
/core/src/main/scala/com/lakala/datacenter/core/abstractions/PregelProgram.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.abstractions
 2 | 
 3 | /**
 4 |   * Created by peter on 2017/4/26.
 5 |   */
 6 | import org.apache.spark.graphx._
 7 | import scala.reflect.ClassTag
 8 | 
 9 | /**
10 |   * The [[PregelProgram]] abstraction wraps Spark's Pregel API implementation from the [[GraphOps]]
11 |   * class into a model that is easier to write graph algorithms.
12 |   * @tparam VertexState is the generic type representing the state of a vertex
13 |   */
14 | abstract class PregelProgram[VertexState: ClassTag, VD: ClassTag, ED: ClassTag] protected () extends Serializable {
15 | 
16 |   @transient val graph: Graph[VD, ED]
17 | 
18 |   /**
19 |     * The vertex program receives a state update and acts to update its state
20 |     * @param id is the [[VertexId]] that this program will perform a state operation for
21 |     * @param state is the current state of this [[VertexId]]
22 |     * @param message is the state received from another vertex in the graph
23 |     * @return a [[VertexState]] resulting from a comparison between current state and incoming state
24 |     */
25 |   def vertexProgram(id : VertexId, state : VertexState, message : VertexState) : VertexState
26 | 
27 |   /**
28 |     * The message broker sends and receives messages. It will initially receive one message for
29 |     * each vertex in the graph.
30 |     * @param triplet An edge triplet is an object containing a pair of connected vertex objects and edge object.
31 |     *                For example (v1)-[r]->(v2)
32 |     * @return The message broker returns a key value list, each containing a VertexId and a new message
33 |     */
34 |   def messageBroker(triplet :EdgeTriplet[VertexState, ED]) : Iterator[(VertexId, VertexState)]
35 | 
36 |   /**
37 |     * This method is used to reduce or combine the set of all state outcomes produced by a vertexProgram
38 |     * for each vertex in each superstep iteration. Each vertex has a list of state updates received from
39 |     * other vertices in the graph via the messageBroker method. This method is used to reduce the list
40 |     * of state updates into a single state for the next superstep iteration.
41 |     * @param a A first [[VertexState]] representing a partial state of a vertex.
42 |     * @param b A second [[VertexState]] representing a different partial state of a vertex
43 |     * @return a merged [[VertexState]] representation from the two [[VertexState]] parameters
44 |     */
45 |   def combiner(a: VertexState, b: VertexState) : VertexState
46 | 
47 | }


--------------------------------------------------------------------------------
/core/src/main/scala/com/lakala/datacenter/core/grograms/EdgeBetweennessProgram.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.grograms
 2 | 
 3 | /**
 4 |   * Created by peter on 2017/4/26.
 5 |   */
 6 | import org.apache.spark.graphx.{EdgeTriplet, Graph, VertexId}
 7 | import com.lakala.datacenter.core.abstractions.PregelProgram
 8 | 
 9 | /**
10 |   * The [[EdgeBetweennessProgram]] is an example graph algorithm implemented on the [[PregelProgram]]
11 |   * abstraction.
12 |   */
13 | class EdgeBetweennessProgram(@transient val graph : Graph[Seq[VertexId], Seq[VertexId]])
14 |   extends PregelProgram[Seq[VertexId], Seq[VertexId], Seq[VertexId]] with Serializable {
15 | 
16 |   protected def this() = this(null)
17 | 
18 |   /**
19 |     * Return the larger of the two vertex attribute values
20 |     * @param id is the [[VertexId]] that this program will perform a state operation for
21 |     * @param state is the current state of this [[VertexId]]
22 |     * @param message is the state received from another vertex in the graph
23 |     * @return an [[Int]] resulting from a comparison between current state and incoming state
24 |     */
25 |   override def vertexProgram(id: VertexId, state: Seq[VertexId], message: Seq[VertexId]): Seq[VertexId] = {
26 |     if(state == null) {
27 |       message
28 |     } else {
29 |       (state ++ message).distinct
30 |     }
31 |   }
32 | 
33 |   /**
34 |     * Return the larger of the two vertex state results
35 |     * @param a A first [[Int]] representing a partial state of a vertex.
36 |     * @param b A second [[Int]] representing a different partial state of a vertex
37 |     * @return a merged [[Int]] representation from the two [[Int]] parameters
38 |     */
39 |   override def combiner(a: Seq[VertexId], b: Seq[VertexId]): Seq[VertexId] = {
40 |     (a ++ b).distinct
41 |   }
42 | 
43 |   /**
44 |     * If the dstVertex's value is less than the srcVertex's value, send a message to the dstVertex to update
45 |     * its state
46 |     * @param triplet An edge triplet is an object containing a pair of connected vertex objects and edge object.
47 |     *                For example (v1)-[r]->(v2)
48 |     * @return The message broker returns a key value list, each containing a VertexId and a new message
49 |     */
50 |   override def messageBroker(triplet: EdgeTriplet[Seq[VertexId], Seq[VertexId]]): Iterator[(VertexId, Seq[VertexId])] = {
51 |     // If the srcAttr is greater than the dstAttr then notify the dstVertex to update its state
52 | 
53 |     if(!triplet.srcAttr.contains(triplet.dstId)) {
54 |       Iterator((triplet.srcId, Seq(triplet.dstId)))
55 |     } else {
56 |       Iterator()
57 |     }
58 | 
59 |   }
60 | 
61 |   /**
62 |     * This method wraps Spark's Pregel API entry point from the [[org.apache.spark.graphx.GraphOps]] class. This provides
63 |     * a simple way to write a suite of graph algorithms by extending the [[PregelProgram]] abstract
64 |     * class and implementing vertexProgram, messageBroker, and combiner methods.
65 |     * @param initialMsg is the initial message received for all vertices in the graph
66 |     */
67 |   def run(initialMsg: Seq[VertexId]): Graph[Seq[VertexId], Seq[VertexId]] = {
68 |     graph.pregel(initialMsg)(this.vertexProgram, this.messageBroker, this.combiner)
69 |   }
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/core/src/main/scala/com/lakala/datacenter/core/grograms/MaximumValueProgram.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.grograms
 2 | 
 3 | import com.lakala.datacenter.core.abstractions.PregelProgram
 4 | import org.apache.spark.graphx.{EdgeTriplet, Graph, VertexId}
 5 | 
 6 | /**
 7 |  * The [[MaximumValueProgram]] is an example graph algorithm implemented on the [[PregelProgram]]
 8 |  * abstraction.
 9 |  */
10 | class MaximumValueProgram(@transient val graph : Graph[Int, Int])
11 |   extends PregelProgram[Int, Int, Int] with Serializable {
12 | 
13 |   protected def this() = this(null)
14 | 
15 |   /**
16 |    * Return the larger of the two vertex attribute values
17 |    * @param id is the [[VertexId]] that this program will perform a state operation for
18 |    * @param state is the current state of this [[VertexId]]
19 |    * @param message is the state received from another vertex in the graph
20 |    * @return an [[Int]] resulting from a comparison between current state and incoming state
21 |    */
22 |   override def vertexProgram(id: VertexId, state: Int, message: Int): Int = {
23 |     if (message > state) {
24 |       message
25 |     } else {
26 |       state
27 |     }
28 |   }
29 | 
30 |   /**
31 |    * Return the larger of the two vertex state results
32 |    * @param a A first [[Int]] representing a partial state of a vertex.
33 |    * @param b A second [[Int]] representing a different partial state of a vertex
34 |    * @return a merged [[Int]] representation from the two [[Int]] parameters
35 |    */
36 |   override def combiner(a: Int, b: Int): Int = {
37 |     math.max(a, b)
38 |   }
39 | 
40 |   /**
41 |    * If the dstVertex's value is less than the srcVertex's value, send a message to the dstVertex to update
42 |    * its state
43 |    * @param triplet An edge triplet is an object containing a pair of connected vertex objects and edge object.
44 |    *                For example (v1)-[r]->(v2)
45 |    * @return The message broker returns a key value list, each containing a VertexId and a new message
46 |    */
47 |   override def messageBroker(triplet: EdgeTriplet[Int, Int]): Iterator[(VertexId, Int)] = {
48 |     // If the srcAttr is greater than the dstAttr then notify the dstVertex to update its state
49 |     if (triplet.srcAttr > triplet.dstAttr) {
50 |       Iterator((triplet.dstId, triplet.srcAttr))
51 |     } else {
52 |       Iterator.empty
53 |     }
54 |   }
55 | 
56 |   /**
57 |    * This method wraps Spark's Pregel API entry point from the [[org.apache.spark.graphx.GraphOps]] class. This provides
58 |    * a simple way to write a suite of graph algorithms by extending the [[PregelProgram]] abstract
59 |    * class and implementing vertexProgram, messageBroker, and combiner methods.
60 |    * @param initialMsg is the initial message received for all vertices in the graph
61 |    */
62 |   def run(initialMsg: Int): Graph[Int, Int] = {
63 |     graph.pregel(initialMsg)(this.vertexProgram, this.messageBroker, this.combiner)
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/core/src/main/scala/com/lakala/datacenter/core/utils/UtilsToos.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.utils
 2 | 
 3 | import java.nio.charset.StandardCharsets
 4 | 
 5 | import com.google.common.hash.Hashing
 6 | import com.lakala.datacenter.common.utils.DateTimeUtils
 7 | 
 8 | import scala.util.matching.Regex
 9 | 
10 | /**
11 |   * Created by ASUS-PC on 2017/4/18.
12 |   */
13 | object UtilsToos {
14 |   /**
15 |     * 根据字符串生成唯一的hashcode值
16 |     *
17 |     * @param str
18 |     * @return
19 |     */
20 |   def hashId(str: String) = {
21 |     Hashing.md5().hashString(str, StandardCharsets.UTF_8).asLong()
22 |   }
23 | 
24 |   /**
25 |     * 手机号,电话号码验证
26 |     *
27 |     * @param  num
28 |     * @return 验证通过返回true
29 |     */
30 |   def isMobileOrPhone(num: String): Boolean = {
31 |     val pattern = new Regex("^((17[0-9])(14[0-9])|(13[0-9])|(15[^4,\\D])|(18[0,5-9]))\\d{8}$")
32 |     val pattern2 = new Regex("(?:(\\(\\+?86\\))(0[0-9]{2,3}\\-?)?([2-9][0-9]{6,7})+(\\-[0-9]{1,4})?)|(?:(86-?)?(0[0-9]{2,3}\\-?)?([2-9][0-9]{6,7})+(\\-[0-9]{1,4})?)") // 验证带区号的
33 | //    val pattern2 = new Regex("^[0][1-9]{2,3}-[0-9]{5,10}$") // 验证带区号的
34 |     val pattern3 = new Regex("^[1-9]{1}[0-9]{5,8}$") // 验证没有区号的
35 |     num match {
36 |       case pattern(_*) => {
37 |         true
38 |       }
39 |       case pattern2(_*) => {
40 |         true
41 |       }
42 |       case pattern3(_*) => {
43 |         true
44 |       }
45 |       case _ => {
46 |         false
47 |       }
48 |     }
49 |   }
50 | 
51 | 
52 | 
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/core/src/test/java/com/lakala/datacenter/core/hdfs/FileUtilTest.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.hdfs;
 2 | 
 3 | import com.lakala.datacenter.core.config.ConfigurationLoader;
 4 | import com.lakala.datacenter.core.models.ProcessorMessage;
 5 | import com.lakala.datacenter.core.models.ProcessorMode;
 6 | import com.lakala.datacenter.core.processor.GraphProcessor;
 7 | import junit.framework.TestCase;
 8 | import org.junit.Test;
 9 | 
10 | import java.util.ArrayList;
11 | import java.util.Arrays;
12 | 
13 | public class FileUtilTest extends TestCase {
14 | 
15 |     @Test
16 |     public void testWritePropertyGraphUpdate() throws Exception {
17 | 
18 |         ConfigurationLoader.testPropertyAccess=true;
19 | 
20 |         // Create sample PageRank result
21 |         String nodeList =
22 |                 "0 .001\n" +
23 |                 "1 .002\n" +
24 |                 "3 .003";
25 | 
26 |         // Create test path
27 |         String path = ConfigurationLoader.getInstance().getHadoopHdfsUri() + "/test/propertyNodeList.txt";
28 | 
29 |         // Test writing the PageRank result to HDFS path
30 |         FileUtil.writePropertyGraphUpdate(new ProcessorMessage(path, GraphProcessor.PAGERANK, ProcessorMode.Partitioned),
31 |                 new ArrayList<>(Arrays.asList(
32 |                     "0 .001\n",
33 |                     "1 .002\n",
34 |                     "3 .003"
35 |                 )));
36 | 
37 |         // Validate node list
38 |         assertEquals(FileUtil.readHdfsFile(path), "# Node Property Value List" + "\n" + nodeList);
39 |     }
40 | }


--------------------------------------------------------------------------------
/core/src/test/java/com/lakala/datacenter/core/messaging/SenderTest.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.core.messaging;
 2 | 
 3 | import com.google.gson.Gson;
 4 | import com.lakala.datacenter.core.config.ConfigurationLoader;
 5 | import com.lakala.datacenter.core.models.ProcessorMessage;
 6 | import com.lakala.datacenter.core.models.ProcessorMode;
 7 | import com.lakala.datacenter.core.processor.GraphProcessor;
 8 | import junit.framework.TestCase;
 9 | 
10 | public class SenderTest extends TestCase {
11 | 
12 |     private static final String EDGE_LIST_RELATIVE_FILE_PATH = "/neo4j/mazerunner/edgeList.txt";
13 | 
14 |     public void testSendMessage() throws Exception {
15 |         ConfigurationLoader.testPropertyAccess=true;
16 |         ProcessorMessage processorMessage = new ProcessorMessage("", "strongly_connected_components", ProcessorMode.Partitioned);
17 |         processorMessage.setPath(ConfigurationLoader.getInstance().getHadoopHdfsUri() + GraphProcessor.PROPERTY_GRAPH_UPDATE_PATH);
18 |         // Serialize the processor message
19 |         Gson gson = new Gson();
20 |         String message = gson.toJson(processorMessage);
21 | 
22 |         // Notify Neo4j that a property update list is available for processing
23 |         Sender.sendMessage(message);
24 |     }
25 | 
26 | 
27 | }


--------------------------------------------------------------------------------
/core/src/test/scala/com/lakala/datacenter/core/grograms/ShortestPathTests.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | package com.lakala.datacenter.core.grograms
 3 | 
 4 | import com.lakala.datacenter.core.algorithms.Algorithms
 5 | import com.lakala.datacenter.core.config.ConfigurationLoader
 6 | import com.lakala.datacenter.core.processor.GraphProcessor
 7 | import org.apache.spark.graphx._
 8 | import org.apache.spark.graphx.lib.ShortestPaths
 9 | import org.apache.spark.rdd.RDD
10 | import org.scalatest.FlatSpec
11 | import scala.collection.mutable
12 | 
13 | class ShortestPathTests  extends FlatSpec {
14 |   /**
15 |    * To collect the shortest path results for all nodes to a single destination node,
16 |    * the following steps must be taken:
17 |    *
18 |    */
19 | 
20 |   ConfigurationLoader.testPropertyAccess = true
21 | 
22 |   // Create Spark context
23 |   val sc = GraphProcessor.initializeSparkContext.sc
24 | 
25 |   val vertexIds = sc.parallelize(Seq(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L)).collect().toSeq
26 | 
27 |   def fixture =
28 |     new {
29 |       
30 |       // Create an RDD for the vertices
31 |       val vertices: RDD[(VertexId, ShortestPathState)] = sc.parallelize(Array(
32 |         (0L, new ShortestPathState(0L, vertexIds)),
33 |         (1L, new ShortestPathState(1L, vertexIds)),
34 |         (2L, new ShortestPathState(2L, vertexIds)),
35 |         (3L, new ShortestPathState(3L, vertexIds)),
36 |         (4L, new ShortestPathState(4L, vertexIds)),
37 |         (5L, new ShortestPathState(5L, vertexIds)),
38 |         (6L, new ShortestPathState(6L, vertexIds)),
39 |         (7L, new ShortestPathState(7L, vertexIds)),
40 |         (8L, new ShortestPathState(8L, vertexIds)),
41 |         (9L, new ShortestPathState(9L, vertexIds)),
42 |         (10L, new ShortestPathState(10L, vertexIds)),
43 |         (11L, new ShortestPathState(11L, vertexIds)),
44 |         (12L, new ShortestPathState(12L, vertexIds))))
45 | 
46 |       // Create an RDD for edges
47 |       val edges: RDD[Edge[Int]] = sc.parallelize(Array(
48 |         Edge(0L, 1L, 0),
49 |         Edge(1L, 4L, 0),
50 |         Edge(1L, 2L, 0),
51 |         Edge(2L, 3L, 0),
52 |         Edge(5L, 6L, 0),
53 |         Edge(6L, 7L, 0),
54 |         Edge(7L, 8L, 0),
55 |         Edge(8L, 9L, 0),
56 |         Edge(9L, 10L, 0),
57 |         Edge(10L, 11L, 0),
58 |         Edge(11L, 12L, 0),
59 |         Edge(12L, 3L, 0),
60 |         Edge(7L, 3L, 0),
61 |         Edge(4L, 3L, 0)))
62 | 
63 |       // Build the initial Graph
64 |       val graph = Graph(vertices, edges, new ShortestPathState(-1L, null))
65 |     }
66 | 
67 |   "A node's state" should "have a decision tree" in {
68 |     val graph = fixture.graph
69 | 
70 |     val tree  = new DecisionTree[VertexId](0L, mutable.HashMap[VertexId, DecisionTree[VertexId]]())
71 | 
72 |     graph.edges.collect().foreach(ed => tree.addLeaf(ed.srcId).addLeaf(ed.dstId))
73 | 
74 |     val vertexIds = graph.vertices.map(v => v._1).cache().collect()
75 | 
76 |     val sssp = ShortestPaths.run(graph, graph.vertices.map { vx => vx._1}.collect()).vertices.collect()
77 | 
78 |     val graphResults = sc.parallelize(vertexIds).map(row => {
79 |       println("*** " + row)
80 |       (row, vertexIds.map(vt => {
81 |         (vt, tree.getNode(row).allShortestPathsTo(vt, sssp))
82 |       }))
83 |     }).collectAsync().get().toArray
84 | 
85 |     val result = Algorithms.betweennessCentrality(sc, graphResults)
86 | 
87 |     val resultStream = result
88 | 
89 |     for (x <- resultStream) {
90 |       println(x)
91 |     }
92 | 
93 |   }
94 | 
95 | }
96 | */
97 | 


--------------------------------------------------------------------------------
/neo4j/bin/start2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ## usage: sh bin/start.sh -i /logs/device/* -d 2016-01-11
 3 | 
 4 | SPARK_HOME=/home/hadoop/spark-1.6.3-bin-hadoop2.6
 5 | HIVE_HOME=/usr/hdp/current/hive-client
 6 | PROJECT_HOME="$(cd "`dirname "$0"`"/..; pwd)"
 7 | HDP_VERSION=2.4.0.0-169
 8 | APP_CACHE_DIR=/tmp/device
 9 | 
10 | stdate=${1:-`date -d '1 days ago' +"%Y-%m-%d"`}
11 | #inputdir=/logs/device/*
12 | #inputfile=/logs/device/*/2016-01-{1[1-9],2[0-1]}
13 | while getopts "d:i:" opt ; do
14 |  case $opt in
15 |   d)stdate=$OPTARG ;;
16 |   i)inputdir=$OPTARG ;;
17 |   ?)echo "==> please input arg: stdate(d), inputdir(i)" && exit 1 ;;
18 |  esac
19 | done
20 | 
21 | #echo "==> ready for geoip...."
22 | #hadoop fs -mkdir -p $APP_CACHE_DIR/geoip
23 | #hadoop fs -test -e $APP_CACHE_DIR/geoip/GeoLite2-City.mmdb
24 | #if [ $? -ne 0 ]; then
25 | #    echo "GeoLite2-City.mmdb not exists!"
26 | #    hadoop fs -put $PROJECT_HOME/../tcloud-log-analysis/src/main/bundleApp/coord-common/geoip/GeoLite2-City.mmdb $APP_CACHE_DIR/geoip/
27 | #fi
28 | 
29 | ## https://issues.apache.org/jira/browse/ZEPPELIN-93
30 | ## https://github.com/caskdata/cdap/pull/4106
31 | spark-submit \
32 |  --master spark://datacenter17:7077,datacenter18:7077 \
33 |  --class com.lakala.datacenter.main.Driver \
34 |  --driver-memory 2G \
35 |  --executor-memory 4G \
36 |  --num-executors 3 \
37 |  --executor-cores 3 \
38 |  --conf "spark.rpc.askTimeout=300s" \
39 |  --verbose \
40 |  --files $SPARK_HOME/conf/hive-site.xml \
41 |  --driver-class-path $PROJECT_HOME/target/dependency/mysql-connector-java-5.1.36.jar \
42 |  --jars $PROJECT_HOME/target/dependency/mysql-connector-java-5.1.36.jar,$SPARK_HOME/lib/datanucleus-api-jdo-3.2.6.jar,$SPARK_HOME/lib/datanucleus-core-3.2.10.jar,$PROJECT_HOME/target/dependency/guava-14.0.1.jar,$SPARK_HOME/lib/datanucleus-rdbms-3.2.9.jar \
43 |  $PROJECT_HOME/target/graphx-analysis-apply.jar \
44 |  -i /user/linyanshi/query_result.csv -c /user/linyanshi/part-00003 -o file:////home/hadoop/grogram/analysis/graphx-analysis/apply/bin/output
45 | 
46 | ## --packages com.databricks:spark-csv_2.10:1.3.0 \
47 | ## 2>&1 > output.txt
48 | 


--------------------------------------------------------------------------------
/neo4j/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <groupId>com.lakala.datacenter</groupId>
 7 |         <artifactId>graphx-analysis</artifactId>
 8 |         <version>1.0.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>graphx-analysis-neo4j</artifactId>
13 |     <packaging>jar</packaging>
14 | 
15 |     <name>graphx-analysis-neo4j</name>
16 |     <url>http://maven.apache.org</url>
17 | 
18 |     <properties>
19 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
20 |     </properties>
21 | 
22 |     <dependencies>
23 |         <!-- module -->
24 |         <dependency>
25 |             <groupId>com.lakala.datacenter</groupId>
26 |             <artifactId>graphx-analysis-core</artifactId>
27 |             <version>${project.version}</version>
28 |         </dependency>
29 | 
30 |     </dependencies>
31 |     <build>
32 |         <finalName>graphx-analysis-neo4j</finalName>
33 |     </build>
34 |     <repositories>
35 |         <repository>
36 |             <id>spark-repo</id>
37 |             <url>http://dl.bintray.com/spark-packages/maven/</url>
38 |         </repository>
39 |     </repositories>
40 | </project>
41 | 


--------------------------------------------------------------------------------
/neo4j/src/main/java/com/lakala/datacenter/enums/DataAttributeType.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.enums;
 2 | 
 3 | /**
 4 |  * Created by Administrator on 2017/6/16 0016.
 5 |  */
 6 | interface DataInterface{}
 7 | 
 8 | public enum DataAttributeType implements DataInterface{
 9 |     ORDERID(1, "orderid"), CONTRACTNO(2, "contractno"), TERMID(3, "termid"), LOANPAN(4, "loanpan"), RETURNPAN(5, "returnpan"),
10 |     INSERTTIME(6, "inserttime"), RECOMMEND(7, "recommend"), USERID(8, "userid"), DEVICEID(9, "deviceid"),
11 |     CERTNO(10, "certno"), EMAIL(11, "email"), COMPANY(12, "company"), MOBILE(13, "mobile"), COMPADDR(14, "compaddr"),
12 |     COMPPHONE(15, "compphone"), EMERGENCYCONTACTMOBILE(16, "emergencycontactmobile"),
13 |     CONTACTMOBILE(17, "contactmobile"), IPV4(18, "ipv4"), MSGPHONE(19, "msgphone"), TELECODE(20, "telecode");
14 |     //成员变量
15 |     private int sequence;
16 |     private String name;
17 | 
18 |     //构造方法
19 |     private DataAttributeType(int sequence, String name) {
20 |         this.sequence = sequence;
21 |         this.name = name;
22 |     }
23 | 
24 |     //自定义方法
25 |     public static String getColorName(int sequence) {
26 |         for (DataAttributeType c : DataAttributeType.values()) {
27 |             if (c.getSequence() == sequence)
28 |                 return c.name;
29 |         }
30 |         return null;
31 |     }
32 | 
33 |     //getter&setter
34 |     public int getSequence() {
35 |         return sequence;
36 |     }
37 | 
38 |     public void setSequence(int sequence) {
39 |         this.sequence = sequence;
40 |     }
41 | 
42 |     public String getName() {
43 |         return name;
44 |     }
45 | 
46 |     public void setName(String name) {
47 |         this.name = name;
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/neo4j/src/main/java/com/lakala/datacenter/enums/GraphEnum.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.enums;
 2 | 
 3 | 
 4 | /**
 5 |  * Created by Administrator on 2017/7/11 0011.
 6 |  */
 7 | public enum GraphEnum {
 8 |     TERMINAL("terminal", RelationshipTypes.terminal), BANKCARD("bankcard", RelationshipTypes.bankcard);
 9 |     private String relType;
10 |     private RelationshipTypes relationshipTypes;
11 | 
12 | 
13 |     private GraphEnum(String relType, RelationshipTypes relationshipTypes) {
14 |         this.relType = relType;
15 |         this.relationshipTypes = relationshipTypes;
16 |     }
17 | 
18 |     public String getRelType() {
19 |         return relType;
20 |     }
21 | 
22 |     public RelationshipTypes getRelationshipTypes(String relType) {
23 |         for (GraphEnum ge : GraphEnum.values()) {
24 |             if (ge.relType.equals(relType)) return ge.relationshipTypes;
25 |             continue;
26 |         }
27 |         return null;
28 |     }
29 | 
30 |     public RelationshipTypes getRelationshipTypes() {
31 |         return relationshipTypes;
32 |     }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/neo4j/src/main/java/com/lakala/datacenter/enums/Labels.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.enums;
 2 | 
 3 | import org.neo4j.graphdb.Label;
 4 | 
 5 | /**
 6 |  * Created by Administrator on 2017/5/31 0031.
 7 |  */
 8 | public enum Labels implements Label {
 9 |     ApplyInfo, Terminal, BankCard, Mobile, Identification, Email, Company, CompanyAddress, CompanyTel, Device, IPV4
10 | }
11 | 


--------------------------------------------------------------------------------
/neo4j/src/main/java/com/lakala/datacenter/enums/RelationshipTypes.java:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.enums;
 2 | 
 3 | import org.neo4j.graphdb.RelationshipType;
 4 | 
 5 | /**
 6 |  * Created by Administrator on 2017/5/31 0031.
 7 |  */
 8 | public enum RelationshipTypes implements RelationshipType {
 9 |     terminal, bankcard, loginmobile, ipv4, applymymobile, hometel, recommend, identification, email, company, companyaddress, companytel, emergencymobile,merchantmobile,channelmobile,relativemobile, relativecontact, device;
10 | }
11 | 


--------------------------------------------------------------------------------
/neo4j/src/main/resources/css/style.css:
--------------------------------------------------------------------------------
 1 | graph {
 2 |     fill-color: white;
 3 | }
 4 | node {
 5 |     size: 65;
 6 |     fill-color: #CCCCCC, #AAAAAA;
 7 |     fill-mode: gradient-radial;
 8 |     text-offset: 0, 0;
 9 |     stroke-mode: plain;
10 |     stroke-color: #333333;
11 | }
12 | node:clicked {
13 |     fill-color: #2277FF, #88AAFF;
14 |     fill-mode: gradient-radial;
15 |     size: 100;
16 |     text-size:18;
17 |     text-offset: 0, 0;
18 | }
19 | edge {
20 |     text-alignment: along;
21 | }
22 | 


--------------------------------------------------------------------------------
/neo4j/src/main/resources/dev/config.properties:
--------------------------------------------------------------------------------
1 | neoIP=bolt://192.168.0.33:7687
2 | user=neo4j
3 | password=123456
4 | #************redis config **********
5 | redisIp=192.168.0.192:6380,192.168.0.192:6381,192.168.0.192:6382,192.168.0.192:6383,192.168.0.192:6384,192.168.0.192:6385
6 | psubscribe=testsub11


--------------------------------------------------------------------------------
/neo4j/src/main/resources/dev/hive-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
 3 | Licensed to the Apache Software Foundation (ASF) under one or more
 4 | contributor license agreements.  See the NOTICE file distributed with
 5 | this work for additional information regarding copyright ownership.
 6 | The ASF licenses this file to You under the Apache License, Version 2.0
 7 | (the "License"); you may not use this file except in compliance with
 8 | the License.  You may obtain a copy of the License at
 9 | 
10 | http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | 	    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | 	    See the License for the specific language governing permissions and
16 | 	    limitations under the License.
17 | 	    -->
18 | 
19 | <configuration>
20 | 
21 | 
22 |     <property>
23 |         <name>hive.metastore.uris</name>
24 |         <value>thrift://192.168.0.212:9083</value>
25 |         <description>Thrift uri for the remote metastore. Used by metastore client to connect to remote metastore.
26 |         </description>
27 |     </property>
28 | 
29 | </configuration>
30 | 


--------------------------------------------------------------------------------
/neo4j/src/main/resources/log4j.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE log4j:configuration PUBLIC "-//APACHE//DTD LOG4J 1.2//EN" "http://logging.apache.org/log4j/1.2/apidocs/org/apache/log4j/xml/doc-files/log4j.dtd">
 3 | <log4j:configuration>
 4 | 
 5 |     <appender name="console" class="org.apache.log4j.ConsoleAppender">
 6 |         <param name="Target" value="System.out" />
 7 |         <param name="Threshold" value="ERROR" />
 8 |         <!--<param name="Threshold" value="DEBUG" />-->
 9 |         <param name="Encoding" value="UTF-8"></param>
10 |         <layout class="org.apache.log4j.PatternLayout">
11 |             <!--<param name="ConversionPattern" value="%d{ABSOLUTE} %-5p %C{1}:%L - %m%n" />-->
12 |             <param name="ConversionPattern" value="[graphx-analysis-neo4j][%d][%l][%p]->[%m]%n"/>
13 |         </layout>
14 |     </appender>
15 | 
16 |     <appender name="dataAnalysisLog" class="org.apache.log4j.DailyRollingFileAppender">
17 |         <param name="File" value="/home/hadoop/grogram/analysis/graphx_neo4j" />
18 |         <param name="DatePattern" value="'.'yyyy-MM-dd'.log'" />
19 |         <layout class="org.apache.log4j.PatternLayout">
20 |             <param name="ConversionPattern" value="[graphx-analysis-apply][%d][%l][%p]->[%m]%n" />
21 |         </layout>
22 |     </appender>
23 | 
24 |     <logger name="com.lakala.datacenter" additivity="false">
25 |         <level value="debug"></level>
26 |         <appender-ref ref="console" />
27 |         <appender-ref ref="dataAnalysisLog" />
28 |     </logger>
29 | 
30 |     <root>
31 |         <priority value="info"></priority>
32 |         <appender-ref ref="dataAnalysisLog" />
33 |         <appender-ref ref="console" />
34 |     </root>
35 | </log4j:configuration>
36 | 


--------------------------------------------------------------------------------
/neo4j/src/main/resources/product/config.properties:
--------------------------------------------------------------------------------
1 | neoIP=bolt://10.16.65.15:7688
2 | user=neo4j
3 | password=123456
4 | #************redis config **********
5 | redisIp=10.0.8.170:6800,10.0.8.170:6801,10.0.8.171:6800,10.0.8.171:6801,10.0.8.172:6800,10.0.8.172:6801
6 | psubscribe=dataPlatform.anti_fraud.order_monitor


--------------------------------------------------------------------------------
/neo4j/src/main/resources/test/config.properties:
--------------------------------------------------------------------------------
1 | neoIP=http://192.168.0.33:7474/db/data
2 | user=neo4j
3 | password=123456
4 | #************redis config **********
5 | redisIp=192.168.0.192:6380,192.168.0.192:6381,192.168.0.192:6382,192.168.0.192:6383,192.168.0.192:6384,192.168.0.192:6385
6 | psubscribe=testsub11


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/abstractions/DataGenerator.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.abstractions
 2 | 
 3 | import com.lakala.datacenter.utils.Config
 4 | 
 5 | /**
 6 |   * Created by Administrator on 2017/5/31 0031.
 7 |   */
 8 | trait DataGenerator {
 9 |   def generateUsers(config: Config): Unit
10 | }
11 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/ClusterGraphDatabase.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | package com.lakala.datacenter.load.spark
 3 | 
 4 | import com.lakala.datacenter.enums.Labels
 5 | import org.neo4j.graphdb.index.IndexHits
 6 | import org.neo4j.graphdb.{Node, Relationship}
 7 | import org.neo4j.helpers.collection.MapUtil
 8 | import org.neo4j.index.impl.lucene.legacy.LuceneIndexImplementation
 9 | import org.neo4j.rest.graphdb.index.RestIndex
10 | import org.neo4j.rest.graphdb.query.RestCypherQueryEngine
11 | import org.neo4j.rest.graphdb.{RestAPI, RestAPIFacade}
12 | 
13 | /**
14 |   * Created by Administrator on 2017/6/19 0019.
15 |   */
16 | object ClusterGraphDatabase {
17 |   private var restAPI: RestAPI = null
18 |   private val serverBaseUrl = "http://192.168.0.33:7474/db/data"
19 |   private val user = "neo4j"
20 |   private val password = "123456"
21 | 
22 |   def main(args: Array[String]): Unit = {
23 |     try
24 |       setUp
25 |       countExistingNodes
26 |       tearDown
27 |   }
28 | 
29 |   @throws[Throwable]
30 |   def setUp(): Unit = {
31 |     restAPI = new RestAPIFacade(serverBaseUrl, user, password)
32 |     validateServerIsUp()
33 |     val queryEngine = new RestCypherQueryEngine(restAPI)
34 |     //    graphdb = queryEngine.asInstanceOf[GraphDatabaseService]
35 |   }
36 | 
37 |   @throws[Throwable]
38 |   private def validateServerIsUp() = {
39 |     try
40 |       restAPI.getAllLabelNames
41 |     catch {
42 |       case e: Throwable =>
43 |         println(" !!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!  \n" + "this test assumes a Neo4j Server is running in a separate process \n" + "on localhost port 7474. You will need to manually start it before \n" + "running these demo tests.")
44 |         throw e
45 |     }
46 |   }
47 | 
48 |   def tearDown(): Unit = {
49 |     restAPI.close()
50 |   }
51 | 
52 | 
53 |   def countExistingNodes(): Unit = {
54 |     //472
55 |     val node2 = restAPI.getNodeById(293)
56 |     println(node2.getLabels.iterator().next().name())
57 |     val indexs = restAPI.createIndex(classOf[Node], "orderno", LuceneIndexImplementation.EXACT_CONFIG)
58 |     val relIndex: RestIndex[Relationship] = restAPI.createIndex(classOf[Relationship], "terminal", LuceneIndexImplementation.EXACT_CONFIG)
59 |     val terminalIndexs = restAPI.createIndex(classOf[Node], "content", LuceneIndexImplementation.EXACT_CONFIG)
60 | 
61 |     val hitIndex2: IndexHits[Node] = indexs.get("orderno", "XNA20170617214709013851193476043")
62 |     val hitIndex: IndexHits[Node] = terminalIndexs.get("content", "CBC3A110160228103")
63 |     println(hitIndex2.size())
64 |     println(hitIndex2.getSingle)
65 |     println("#################")
66 |     println(hitIndex.size())
67 |     println(hitIndex.getSingle)
68 |     val applyNode = restAPI.getOrCreateNode(indexs, "orderno", "XNA20170617214709013851193476043", MapUtil.map("term_id", "CBC3A110160228103"))
69 |     applyNode.addLabel(Labels.ApplyInfo)
70 |     println(applyNode.getLabels.iterator().next().name())
71 |     applyNode.setProperty("orderno", "XNA20170617214709013851193476043")
72 |     //    applyNode.setProperty("term_id", "CBC3A110160228103")
73 |     applyNode.setProperty("modelname", Labels.ApplyInfo)
74 | 
75 |     val terminalNode = restAPI.getOrCreateNode(terminalIndexs, "content", "CBC3A110160228103", MapUtil.map())
76 |     terminalNode.addLabel(Labels.Terminal)
77 |     terminalNode.setProperty("modelname", Labels.Terminal)
78 |     println(terminalNode.getLabels.iterator().next().name())
79 |     val rel = restAPI.getOrCreateRelationship(relIndex, "", "", applyNode, terminalNode, "terminal", MapUtil.map())
80 | 
81 |     if (applyNode != null) {
82 |       println("====================")
83 |       println("apply  node " + applyNode.getId() + " terminal node " + terminalNode.getId + " relationship " + rel.getId + "  is created.")
84 |     }
85 | 
86 |   }
87 | }
88 | */
89 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/LoadHiveData.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.load.spark
 2 | 
 3 | import com.lakala.datacenter.core.utils.UtilsToos
 4 | import org.apache.commons.lang3.StringUtils
 5 | import org.apache.spark.sql.hive.HiveContext
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | /**
 9 |   * Created by Administrator on "2017"/5/"31" 0031.
10 |   */
11 | object LoadHiveData {
12 |   def main(args: Array[String]): Unit = {
13 |     val conf = new SparkConf().setAppName("LoadHiveData")
14 |     val sc = new SparkContext(conf)
15 |     val hc = new HiveContext(sc)
16 |     hc.sql("use creditloan")
17 |     val sql =
18 |       s"""select a.order_id,a.contract_no,a.term_id,a.loan_pan,a.return_pan,a.insert_time,a.recommend,a.user_id,b.cert_no,b.email,b.company,b.mobile,b.comp_addr,b.comp_phone,b.emergency_contact_mobile,b.contact_mobile,c.device_id
19 |          |from creditloan.s_c_loan_apply a
20 |          | left join creditloan.s_c_apply_user b on a.user_id =b.id and (a.year="2017" and a.month="05" and a.day="31") and (b.year="2017" and b.month="05" and b.day="31")
21 |          | left join creditloan.s_c_loan_deviceidauth c on a.order_id =c.order_no and (a.year="2017" and a.month="05" and a.day="31") and (c.year="2017" and c.month="05" and c.day="31") """.stripMargin
22 | 
23 |     val df = hc.sql(sql)
24 |     val lineRDD = df.mapPartitions { rows =>
25 |       rows.map { row =>
26 |         val orderId = row.getAs[String]("order_id")
27 |         val contractNo = if (StringUtils.isNotBlank(row.getAs[String]("contract_no"))) row.getAs[String]("contract_no") else ""
28 |         val termId = if (StringUtils.isNotBlank(row.getAs[String]("term_id"))) row.getAs[String]("term_id") else ""
29 |         val loanPan = if (StringUtils.isNotBlank(row.getAs[String]("loan_pan"))) row.getAs[String]("loan_pan") else ""
30 |         val returnPan = if (StringUtils.isNotBlank(row.getAs[String]("return_pan"))) row.getAs[String]("return_pan") else ""
31 |         val insertTime = if (StringUtils.isNotBlank(row.getAs[String]("insert_time"))) row.getAs[String]("insert_time") else ""
32 |         val recommend = if (StringUtils.isNotBlank(row.getAs[String]("recommend")) && UtilsToos.isMobileOrPhone(row.getAs[String]("recommend"))) row.getAs[String]("recommend") else ""
33 |         val userId = if (StringUtils.isNotBlank(row.getAs[String]("user_id"))) row.getAs[String]("user_id") else ""
34 |         val certNo = if (StringUtils.isNotBlank(row.getAs[String]("cert_no"))) row.getAs[String]("cert_no") else ""
35 |         val email = if (StringUtils.isNotBlank(row.getAs[String]("email"))) row.getAs[String]("email") else ""
36 |         val company = if (StringUtils.isNotBlank(row.getAs[String]("company"))) row.getAs[String]("company") else ""
37 |         val mobile = if (StringUtils.isNotBlank(row.getAs[String]("mobile")) && UtilsToos.isMobileOrPhone(row.getAs[String]("mobile"))) row.getAs[String]("mobile") else ""
38 |         val compAddr = if (StringUtils.isNotBlank(row.getAs[String]("comp_addr"))) row.getAs[String]("comp_addr") else ""
39 |         val compPhone = if (StringUtils.isNotBlank(row.getAs[String]("comp_phone"))) row.getAs[String]("comp_phone") else ""
40 |         val emergencyContactMobile = if (StringUtils.isNotBlank(row.getAs[String]("emergency_contact_mobile"))) row.getAs[String]("emergency_contact_mobile") else ""
41 |         val contactMobile = if (StringUtils.isNotBlank(row.getAs[String]("contact_mobile"))) row.getAs[String]("contact_mobile") else ""
42 |         val deviceId = if (StringUtils.isNotBlank(row.getAs[String]("device_id"))) row.getAs[String]("device_id") else ""
43 |         s"$orderId,$contractNo,$termId,$loanPan,$returnPan,$insertTime,$recommend,$userId,$certNo,$email,$company,$mobile,$compAddr,$compPhone,$emergencyContactMobile,$contactMobile,$deviceId"
44 |       }
45 |     }
46 | 
47 |     lineRDD.saveAsTextFile(args(1))
48 | 
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/Neo4jConfig.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.load.spark
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.neo4j.driver.v1.{AuthTokens, Config, Driver, GraphDatabase}
 5 | 
 6 | /**
 7 |   * @author lys
 8 |   * @since 02.03.16
 9 |   */
10 | case class Neo4jConfig(val url: String, val user: String = "neo4j", val password: Option[String] = None) {
11 | 
12 |   def boltConfig() = Config.build.withEncryptionLevel(Config.EncryptionLevel.NONE).toConfig
13 | 
14 |   def driver(config: Neo4jConfig): Driver = config.password match {
15 |     case Some(pwd) => GraphDatabase.driver(config.url, AuthTokens.basic(config.user, pwd), boltConfig())
16 |     case _ => GraphDatabase.driver(config.url, boltConfig())
17 |   }
18 | 
19 |   def driver(): Driver = driver(this)
20 | 
21 |   def driver(url: String): Driver = GraphDatabase.driver(url, boltConfig())
22 | 
23 | }
24 | 
25 | object Neo4jConfig {
26 |   val prefix = "spark.neo4j.bolt."
27 | 
28 |   def apply(sparkConf: SparkConf): Neo4jConfig = {
29 |     val url = sparkConf.get(prefix + "url", "bolt://192.168.0.33:7687")
30 |     val user = sparkConf.get(prefix + "user", "neo4j")
31 |     val password: Option[String] = Option(sparkConf.get(prefix + "password", "123456"))
32 |     Neo4jConfig(url, user, password)
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/Neo4jJavaIntegration.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.load.spark
 2 | 
 3 | import java.util
 4 | 
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.api.java.JavaRDD
 7 | import org.apache.spark.sql.SQLContext
 8 | 
 9 | import scala.collection.JavaConverters._
10 | 
11 | /**
12 |   * @author lys
13 |   * @since 19.03.16
14 |   */
15 | object Neo4jJavaIntegration {
16 |   def rowRDD(sc: SparkContext, query: String, parameters: java.util.Map[String, AnyRef]) =
17 |     new Neo4jRowRDD(sc, query, if (parameters == null) Seq.empty else parameters.asScala.toSeq).toJavaRDD()
18 | 
19 |   def tupleRDD(sc: SparkContext, query: String, parameters: java.util.Map[String, AnyRef]): JavaRDD[util.Map[String, AnyRef]] = {
20 |     val params = if (parameters == null) Seq.empty else parameters.asScala.toSeq
21 |     Neo4jTupleRDD(sc, query, params)
22 |       .map((t) => new util.LinkedHashMap[String, AnyRef](t.toMap.asJava).asInstanceOf[util.Map[String, AnyRef]])
23 |       .toJavaRDD()
24 |   }
25 | 
26 |   def dataFrame(sqlContext: SQLContext, query: String, parameters: java.util.Map[String, AnyRef], schemaInfo: util.Map[String, String]) = {
27 |     Neo4jDataFrame(sqlContext, query, parameters.asScala.toSeq, schemaInfo.asScala.toSeq: _*)
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/Neo4jPartition.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.load.spark
 2 | 
 3 | import org.apache.spark.Partition
 4 | 
 5 | /**
 6 |   * @author lys
 7 |   * @since 02.03.16
 8 |   */
 9 | // , val lower: Long = 0, val upper: Long = 0 -> paging for cypher queries with skip / limit
10 | class Neo4jPartition(idx: Long = 0, skip : Long = 0, limit : Long = Long.MaxValue) extends Partition {
11 |   override def index: Int = idx.toInt
12 |   val window : Map[String,Any] = Map("_limit" -> limit, "_skip" -> skip)
13 | 
14 |   override def toString: String = s"Neo4jRDD index $index skip $skip limit: $limit"
15 | }
16 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/Neo4jRowRDD.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.load.spark
 2 | 
 3 | import org.apache.spark._
 4 | import org.apache.spark.rdd.RDD
 5 | import org.apache.spark.sql.Row
 6 | import org.neo4j.driver.v1._
 7 | 
 8 | import scala.collection.JavaConverters._
 9 | 
10 | class Neo4jRowRDD(@transient sc: SparkContext, val query: String, val parameters: Seq[(String, Any)])
11 |   extends RDD[Row](sc, Nil) {
12 | 
13 |   private val config = Neo4jConfig(sc.getConf)
14 | 
15 |   override def compute(split: Partition, context: TaskContext): Iterator[Row] = {
16 |     val driver = config.driver()
17 |     val session = driver.session()
18 | 
19 |     val result: StatementResult = session.run(query, parameters.toMap.mapValues(_.asInstanceOf[AnyRef]).asJava)
20 | 
21 |     result.asScala.map((record) => {
22 |       val keyCount = record.size()
23 | 
24 |       val res = if (keyCount == 0) Row.empty
25 |       else if (keyCount == 1) Row(record.get(0).asObject())
26 |       else {
27 |         val builder = Seq.newBuilder[AnyRef]
28 |         var i = 0
29 |         while (i < keyCount) {
30 |           builder += record.get(i).asObject()
31 |           i = i + 1
32 |         }
33 |         Row.fromSeq(builder.result())
34 |       }
35 |       if (!result.hasNext) {
36 |         session.close()
37 |         driver.close()
38 |       }
39 |       res
40 |     })
41 |   }
42 | 
43 |   override protected def getPartitions: Array[Partition] = Array(new Neo4jPartition())
44 | }
45 | 
46 | object Neo4jRowRDD {
47 |   def apply(sc: SparkContext, query: String, parameters: Seq[(String, Any)] = Seq.empty) = new Neo4jRowRDD(sc, query, parameters)
48 | }
49 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/load/spark/Neo4jTupleRDD.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.load.spark
 2 | 
 3 | import org.apache.spark._
 4 | import org.apache.spark.rdd.RDD
 5 | import org.neo4j.driver.v1.Driver
 6 | 
 7 | import scala.collection.JavaConverters._
 8 | 
 9 | class Neo4jTupleRDD(@transient sc: SparkContext, val query: String, val parameters: Seq[(String, AnyRef)])
10 |   extends RDD[Seq[(String, AnyRef)]](sc, Nil) {
11 | 
12 |   private val config = Neo4jConfig(sc.getConf)
13 | 
14 |   override def compute(split: Partition, context: TaskContext): Iterator[Seq[(String, AnyRef)]] = {
15 |     val driver: Driver = config.driver()
16 |     val session = driver.session()
17 | 
18 |     val result = session.run(query, parameters.toMap.asJava)
19 | 
20 |     result.asScala.map( (record) => {
21 |       val res = record.asMap().asScala.toSeq
22 |       if (!result.hasNext) {
23 |         session.close()
24 |         driver.close()
25 |       }
26 |       res
27 |     })
28 |   }
29 | 
30 |   override protected def getPartitions: Array[Partition] = Array(new Neo4jPartition())
31 | }
32 | 
33 | object Neo4jTupleRDD {
34 |   def apply(sc: SparkContext, query: String, parameters: Seq[(String,AnyRef)] = Seq.empty) = new Neo4jTupleRDD(sc, query, parameters)
35 | }
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/main/Main.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import java.io.File
 4 | 
 5 | import com.lakala.datacenter.enums.RelationshipTypes
 6 | import com.lakala.datacenter.grogram.Neo4jDataGenerator
 7 | import com.lakala.datacenter.utils.Config
 8 | import org.joda.time.DateTime
 9 | import org.neo4j.graphdb.factory.GraphDatabaseFactory
10 | import org.neo4j.graphdb.{Direction, GraphDatabaseService}
11 | import org.neo4j.io.fs.FileUtils
12 | import org.slf4j.LoggerFactory
13 | 
14 | /**
15 |   * Created by Administrator on 2017/5/31 0031.
16 |   */
17 | object Main {
18 |   private val logger = LoggerFactory.getLogger("Main")
19 |   val COUNT = 100000 //数据批量提交
20 |   //F:\tmp\applydir F:\tmp\neo4j\tmp01
21 |   val FRIENDS_PER_USER = 50
22 | 
23 |   def main(args: Array[String]): Unit = {
24 |     val mainTime = DateTime.now()
25 |     println("start generateGraphData time " + DateTime.now())
26 |     //    chackArgs(args) 13199050
27 |     //    val config = ArgsCommon.parseArgs(args)
28 |     val config = new Config()
29 |     config.input = args(0)
30 |     config.output = args(1)
31 |     generateGraphData(config)
32 |     val endtime = DateTime.now()
33 |     println("end generateGraphData time " + endtime + "+run long time " + (endtime.getMillis - mainTime.getMillis) / 36000)
34 |   }
35 | 
36 |   def generateGraphData(config: Config): Unit = {
37 |     FileUtils.deleteRecursively(new File(config.output + "/" + config.neo4jDB))
38 |     var graphdb = new GraphDatabaseFactory().newEmbeddedDatabase(new File(config.output + "/" + config.neo4jDB))
39 |     val neo4jDataGenerator = new Neo4jDataGenerator(graphdb)
40 |     //生成数据
41 |     neo4jDataGenerator.generateUsers(config)
42 |     registerShutdownHook(graphdb)
43 |   }
44 | 
45 |   /**
46 |     * START SNIPPET: shutdownHook
47 |     * @param graph
48 |     */
49 |   def registerShutdownHook(graph: GraphDatabaseService): Unit = {
50 |     Runtime.getRuntime.addShutdownHook(new Thread() {
51 |       override def run(): Unit = {
52 |         graph.shutdown()
53 |       }
54 |     })
55 |   }
56 | 
57 |   def chackArgs(args: Array[String]): Unit = {
58 |     if (args.length < 1) {
59 |       println("Usage: class com.lakala.datacenter.grogress.ExportNDegreeData$ [options]\n" +
60 |         "[<property>=<value>....]\n  " +
61 |         "-i <value> | --Input <value>\n     applyInput file or path  Required.\n  " +
62 |         "-o <value> | --output <value>\n     output path Required\n  " +
63 |         "-m <value> | --master <value>\n     spark master, local[N] or spark://host:port default=local\n  " +
64 |         "-h <value> | --sparkhome <value>\n     SPARK_HOME Required to run on cluster\n  " +
65 |         "-n <value> | --jobname <value>\n     job name\n  " +
66 |         "-s <value> | --startDate <value>\n     use start date load data\n  " +
67 |         "-t <value> | --endDate <value>\n     use end date load data\n  " +
68 |         "-p <value> | --parallelism <value>\n     sets spark.default.parallelism and minSplits on the edge file. default=based on input partitions\n  " +
69 |         "-x <value> | --minprogress <value>\n     Number of vertices that must change communites for the algorithm to consider progress. default=2000\n  " +
70 |         "-y <value> | --progresscounter <value>\n     Number of times the algorithm can fail to make progress before exiting. default=1\n  " +
71 |         "-d <value> | --edgedelimiter <value>\n     specify input file edge delimiter. default=\",\"\n  " +
72 |         "-j <value> | --jars <value>\n     comma seperated list of jars\n  " +
73 |         "-e <value> | --encrypy <value>\n     Set to true to  all data  convert encrypy need all data use google hash's MD5 generage Long ids. Defaults to false\n  " +
74 |         "-b <value> | --blacType <value>\n     Set to true to exprot black result data, Defaults to false\n  " +
75 |         " <property>=<value>.... ")
76 |       sys.exit(1)
77 |     }
78 |   }
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/main/MessageParam.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import kafka.consumer.KafkaStream
 4 | import org.neo4j.driver.v1.Session
 5 | import redis.clients.jedis.JedisCluster
 6 | 
 7 | /**
 8 |   * Created by Administrator on 2017/8/7 0007.
 9 |   */
10 | case class MessageParam(m_stream: KafkaStream[_, _], m_threadNumber: Int, redis: JedisCluster,
11 |                         session: Session, sessionBak: Session,psubscribe:String) {
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/main/TrialConsumerKafka.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.main
 2 | 
 3 | import java.util.Properties
 4 | import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
 5 | 
 6 | import com.lakala.datacenter.constant.StreamingConstant
 7 | import com.lakala.datacenter.utils.RedisUtils
 8 | import com.lakala.datacenter.utils.UtilsTools.properties
 9 | import kafka.consumer.{ConsumerConfig, ConsumerConnector, KafkaStream}
10 | import kafka.serializer.StringDecoder
11 | import kafka.utils.VerifiableProperties
12 | import org.apache.commons.lang3.StringUtils.trim
13 | import org.neo4j.driver.v1.{AuthTokens, Driver, GraphDatabase}
14 | import redis.clients.jedis.JedisCluster
15 | 
16 | import scala.collection.Map
17 | 
18 | /**
19 |   * Created by Administrator on 2017/8/7 0007.
20 |   *
21 |   */
22 | 
23 | object TrialConsumerKafka{
24 |   def main(args: Array[String]): Unit = {
25 |     val zooKeeper: String = args(0)
26 |     val groupId: String = args(1)
27 |     val topic: String = args(2)
28 |     val threads: Int = args(3).toInt
29 |     println("start trial consumer kafaka message .....")
30 |     val example:TrialConsumerKafka= new TrialConsumerKafka(zooKeeper, groupId, topic)
31 |     example.run(threads)
32 | 
33 |     try {
34 |       Thread.sleep(10000)
35 |     } catch {
36 |       case ie: InterruptedException =>
37 |         println("==============")
38 |     }
39 | 
40 |   }
41 | }
42 | 
43 | class TrialConsumerKafka {
44 |   private var consumer: ConsumerConnector = null
45 |   private var topic: String = null
46 |   private var executor: ExecutorService = null
47 |   private var driver: Driver = null
48 |   private var redis: JedisCluster = RedisUtils.jedisCluster()
49 |   val properies = properties(StreamingConstant.CONFIG)
50 |   def this(a_zookeeper: String, a_groupId: String, a_topic: String) {
51 |     this()
52 |     this.topic = a_topic
53 |     consumer = kafka.consumer.Consumer.create(createConsumerConfig(a_zookeeper, a_groupId))
54 |     driver = GraphDatabase.driver(trim(properies.getProperty(StreamingConstant.NEOIP)), AuthTokens.basic(trim(properies.getProperty(StreamingConstant.USER)), trim(properies.getProperty(StreamingConstant.PASSWORD))))
55 |   }
56 | 
57 |   def shutdown(): Unit = {
58 |     if (consumer != null) consumer.shutdown
59 |     if (executor != null) executor.shutdown
60 |     try {
61 |       if (!executor.awaitTermination(5000, TimeUnit.MILLISECONDS)) System.out.println("Timed out waiting for consumer threads to shut down, exiting uncleanly")
62 |     } catch {
63 |       case e: InterruptedException =>
64 |         System.out.println("Interrupted during shutdown, exiting uncleanly")
65 |     }
66 |   }
67 | 
68 | 
69 |   def run(a_numThreads: Int): Unit = {
70 |     val topicCountMap = Map(topic -> a_numThreads)
71 |     //    val topicCountMap = Map(topic -> 1)
72 |     val keyDecoder = new StringDecoder(new VerifiableProperties)
73 |     val valueDecoder = new StringDecoder(new VerifiableProperties)
74 |     val consumerMap: Map[String, List[KafkaStream[String, String]]] = consumer.createMessageStreams(topicCountMap, keyDecoder, valueDecoder)
75 |     val streams: List[KafkaStream[String, String]] = consumerMap.get(topic).get
76 | 
77 |     executor = Executors.newFixedThreadPool(a_numThreads)
78 |     var threadNumber = 0
79 |     streams.foreach { stream =>
80 |       executor.submit(new HandleTask(MessageParam(stream, threadNumber, redis,
81 |         driver.session, driver.session, properies.getProperty(StreamingConstant.PSUBSCRIBE))))
82 |       threadNumber += 1
83 |     }
84 |   }
85 | 
86 |   private def createConsumerConfig(a_zookeeper: String, a_groupId: String): ConsumerConfig = {
87 |     val props = new Properties()
88 |     props.put("zookeeper.connect", a_zookeeper)
89 |     props.put("group.id", a_groupId)
90 |     props.put("zookeeper.session.timeout.ms", "60000")
91 |     props.put("zookeeper.sync.time.ms", "200")
92 |     props.put("auto.commit.interval.ms", "1000")
93 |     props.put("auto.offset.reset", "smallest")
94 |     props.put("rebalance.max.retries", "5")
95 |     props.put("rebalance.backoff.ms", "12000")
96 |     props.put("serializer.class", "kafka.serializer.StringEncoder")
97 |     new ConsumerConfig(props)
98 |   }
99 | }


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/realtimeBuildGraphx/MsgOffsetStreamListener.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.realtimeBuildGraphx
 2 | 
 3 | import com.lakala.datacenter.constant.StreamingConstant
 4 | import com.lakala.datacenter.utils.Config
 5 | import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
 6 | import org.I0Itec.zkclient.ZkClient
 7 | import org.I0Itec.zkclient.exception.ZkMarshallingError
 8 | import org.I0Itec.zkclient.serialize.ZkSerializer
 9 | import org.apache.spark.Logging
10 | import org.apache.spark.streaming.Time
11 | import org.apache.spark.streaming.kafka.OffsetRange
12 | import org.apache.spark.streaming.scheduler.{StreamingListener, StreamingListenerBatchCompleted, StreamingListenerReceiverError, StreamingListenerReceiverStopped}
13 | 
14 | import scala.collection.mutable
15 | 
16 | /**
17 |   * Created by Administrator on 2017/6/9 0009.
18 |   */
19 | class MsgOffsetStreamListener(config: Config, offsetRanges: mutable.Map[Time, Array[OffsetRange]]) extends StreamingListener with Logging {
20 | 
21 |   var zkClient = getZkClient(config.zkIPs)
22 | //  val zkUtils = ZkUtils.apply(zkClient,true)
23 |   val topicDirs = new ZKGroupTopicDirs(config.group, config.topic)
24 | 
25 |   override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
26 |     //创建一个 ZKGroupTopicDirs 对象，对保存
27 |     //查询该路径下是否字节点（默认有字节点为我们自己保存不同 partition 时生成的）
28 |     //    println(batchCompleted.batchInfo.numRecords)
29 |     if (batchCompleted.batchInfo.numRecords > 0) {
30 |       val currOffsetRange = offsetRanges.remove(batchCompleted.batchInfo.batchTime).getOrElse(Array[OffsetRange]())
31 |       currOffsetRange.foreach { x =>
32 |         val zkPath = s"${topicDirs.consumerOffsetDir}/${x.partition}"
33 |         //将该 partition 的 offset 保存到 zookeeper
34 | //        ZkUtils.apply(zkClient,true).updatePersistentPath(zkPath, s"${x.fromOffset}")
35 |         ZkUtils.updatePersistentPath(zkClient, zkPath, s"${x.fromOffset}")
36 |         println(s"zkPath:${zkPath} offset:fromOffset ${x.fromOffset} untilOffset ${x.untilOffset}")
37 | //        logInfo(s"zkPath:${zkPath} offset:fromOffset ${x.fromOffset} untilOffset ${x.untilOffset}")
38 |       }
39 |     }
40 |   }
41 | 
42 |   override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = {
43 |     val topicDirs = new ZKGroupTopicDirs(config.group, config.topic)
44 |     logError(s"ERROR:${receiverError.receiverInfo.lastError}\n Message:${receiverError.receiverInfo.lastErrorMessage}")
45 |     val currOffsetRange = offsetRanges.remove(Time.apply(receiverError.receiverInfo.lastErrorTime)).getOrElse(Array[OffsetRange]())
46 |     currOffsetRange.foreach { x =>
47 |       val zkPath = s"${topicDirs.consumerOffsetDir}/${x.partition}"
48 | //      ZkUtils.apply(zkClient,true).updatePersistentPath(zkPath, s"${x.fromOffset}")
49 |       ZkUtils.updatePersistentPath(zkClient, zkPath, s"${x.fromOffset}")
50 |       println(s"zkPath:${zkPath} offset:fromOffset ${x.fromOffset} untilOffset ${x.untilOffset}")
51 | //      logInfo(s"zkPath:${zkPath} offset:fromOffset ${x.fromOffset} untilOffset ${x.untilOffset}")
52 |     }
53 |   }
54 | 
55 |   def getZkClient(zkServers: String, sessionTimeout: Int = 60000, connectionTimeout: Int = 60000): ZkClient = {
56 |     val zkClient = new ZkClient(zkServers, sessionTimeout, connectionTimeout, new ZkSerializer {
57 |       override def serialize(data: Object): Array[Byte] = {
58 |         try {
59 |           return data.toString().getBytes(StreamingConstant.CODE)
60 |         } catch {
61 |           case e: ZkMarshallingError => return null
62 | 
63 |         }
64 |       }
65 |       override def deserialize(bytes: Array[Byte]): Object = {
66 |         try {
67 |           return new String(bytes, StreamingConstant.CODE)
68 |         } catch {
69 |           case e: ZkMarshallingError => return null
70 |         }
71 |       }
72 |     })
73 |     zkClient
74 |   }
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/realtimeBuildGraphx/SendMsg.scala:
--------------------------------------------------------------------------------
1 | package com.lakala.datacenter.realtimeBuildGraphx
2 | 
3 | /**
4 |   * Created by Administrator on 2017/8/2 0002.
5 |   */
6 | case class SendMsg(orderno:String,insert_time:String,cert_no:String) {
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/utils/RedisUtils.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.utils
 2 | 
 3 | 
 4 | import java.util
 5 | 
 6 | import com.lakala.datacenter.constant.StreamingConstant
 7 | import com.lakala.datacenter.utils.UtilsTools.properties
 8 | import redis.clients.jedis.{HostAndPort, JedisCluster}
 9 | 
10 | import scala.collection.JavaConversions
11 | 
12 | /**
13 |   * Created by Administrator on 2017/6/29 0029.
14 |   */
15 | object RedisUtils {
16 |   private var cluster: JedisCluster = _
17 |   private val properies = properties(StreamingConstant.CONFIG)
18 | 
19 |   def jedisCluster(): JedisCluster = {
20 |     if (cluster == null) {
21 |       synchronized {
22 |         if (cluster == null) {
23 |           val cluseterNodesSet = for (ipAndPort <- properies.getProperty("redisIp").split(",")) yield
24 |             new HostAndPort(ipAndPort.split(":")(0).trim, (ipAndPort.split(":")(1).trim).toInt)
25 |           cluster = new JedisCluster(JavaConversions.setAsJavaSet[HostAndPort](cluseterNodesSet.toSet))
26 |         }
27 |       }
28 |     }
29 |     cluster
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/neo4j/src/main/scala/com/lakala/datacenter/utils/UtilsTools.scala:
--------------------------------------------------------------------------------
 1 | package com.lakala.datacenter.utils
 2 | 
 3 | import java.io.Serializable
 4 | import java.util.Properties
 5 | 
 6 | import org.slf4j.LoggerFactory
 7 | 
 8 | /**
 9 |   * Created by lenovo on 2016/8/10.
10 |   */
11 | object UtilsTools {
12 |   private val logger = LoggerFactory.getLogger(this.getClass)
13 | 
14 |   def properties(propertiesPath: String): Properties = {
15 |     var _properties: Option[Properties] = None
16 |     _properties match {
17 |       case None => {
18 |         logger.info("Loading configuration...")
19 |         val inputStream = this.getClass.getClassLoader.getResourceAsStream(propertiesPath)
20 |         val underlying = new Properties()
21 |         underlying.load(inputStream)
22 |         _properties = Some(underlying)
23 |         underlying
24 |       }
25 |       case Some(underlying) => {
26 |         underlying
27 |       }
28 |     }
29 |     _properties.get
30 |   }
31 | 
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/neo4j/src/test/java/ConsumerKafka.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by Administrator on 2017/8/7 0007.
  3 |  */
  4 | 
  5 | import kafka.consumer.Consumer;
  6 | import kafka.consumer.ConsumerConfig;
  7 | import kafka.consumer.ConsumerIterator;
  8 | import kafka.consumer.KafkaStream;
  9 | import kafka.javaapi.consumer.ConsumerConnector;
 10 | import kafka.message.MessageAndMetadata;
 11 | import kafka.serializer.StringEncoder;
 12 | 
 13 | import java.util.HashMap;
 14 | import java.util.List;
 15 | import java.util.Map;
 16 | import java.util.Properties;
 17 | import java.util.concurrent.ExecutorService;
 18 | import java.util.concurrent.Executors;
 19 | 
 20 | public class ConsumerKafka {
 21 |     private ConsumerConfig config;
 22 |     private String topic;
 23 |     private int partitionsNum;
 24 |     private MessageExecutor executor;
 25 |     private ConsumerConnector connector;
 26 |     private ExecutorService threadPool;
 27 | 
 28 |     public ConsumerKafka(String topic, int partitionsNum, MessageExecutor executor) throws Exception {
 29 |         Properties prop = new Properties();
 30 |         prop.put("auto.offset.reset", "smallest"); //必须要加，如果要读旧数据
 31 |         prop.put("zookeeper.connect", "192.168.0.208:2181,192.168.0.211:2181,192.168.0.212:2181");
 32 |         prop.put("serializer.class", StringEncoder.class.getName());
 33 |         prop.put("metadata.broker.list", "192.168.0.211:9092,192.168.0.212:9092");
 34 |         prop.put("group.id", "test-consumer-group");
 35 |         config = new ConsumerConfig(prop);
 36 |         this.topic = topic;
 37 |         this.partitionsNum = partitionsNum;
 38 |         this.executor = executor;
 39 |     }
 40 | 
 41 |     public void start() throws Exception {
 42 |         connector = Consumer.createJavaConsumerConnector(config);
 43 |         Map<String, Integer> topics = new HashMap<String, Integer>();
 44 |         topics.put(topic, partitionsNum);
 45 |         Map<String, List<KafkaStream<byte[], byte[]>>> streams = connector.createMessageStreams(topics);
 46 |         List<KafkaStream<byte[], byte[]>> partitions = streams.get(topic);
 47 |         threadPool = Executors.newFixedThreadPool(partitionsNum);
 48 |         for (KafkaStream<byte[], byte[]> partition : partitions) {
 49 |             threadPool.execute(new MessageRunner(partition));
 50 |         }
 51 |     }
 52 | 
 53 | 
 54 |     public void close() {
 55 |         try {
 56 |             threadPool.shutdownNow();
 57 |         } catch (Exception e) {
 58 |             //
 59 |         } finally {
 60 |             connector.shutdown();
 61 |         }
 62 | 
 63 |     }
 64 | 
 65 |     class MessageRunner implements Runnable {
 66 |         private KafkaStream<byte[], byte[]> partition;
 67 | 
 68 |         MessageRunner(KafkaStream<byte[], byte[]> partition) {
 69 |             this.partition = partition;
 70 |         }
 71 | 
 72 |         public void run() {
 73 |             ConsumerIterator<byte[], byte[]> it = partition.iterator();
 74 |             while (it.hasNext()) {
 75 |                 MessageAndMetadata<byte[], byte[]> item = it.next();
 76 |                 System.out.println("partiton:" + item.partition());
 77 |                 System.out.println("offset:" + item.offset());
 78 |                 executor.execute(new String(item.message()));//UTF-8
 79 |             }
 80 |         }
 81 |     }
 82 | 
 83 |     interface MessageExecutor {
 84 | 
 85 |         public void execute(String message);
 86 |     }
 87 | 
 88 |     /**
 89 |      * @param args
 90 |      */
 91 |     public static void main(String[] args) {
 92 |         ConsumerKafka consumer = null;
 93 |         try {
 94 |             MessageExecutor executor = new MessageExecutor() {
 95 | 
 96 |                 public void execute(String message) {
 97 |                     System.out.println(message);
 98 |                 }
 99 |             };
100 |             consumer = new ConsumerKafka("topic1", 3, executor);
101 |             consumer.start();
102 |         } catch (Exception e) {
103 |             e.printStackTrace();
104 |         } finally {
105 |             if (consumer != null) {
106 |                 consumer.close();
107 |             }
108 |         }
109 | 
110 |     }
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/neo4j/src/test/java/DataAttributeType.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by Administrator on 2017/6/16 0016.
 3 |  */
 4 | interface DataInterface{}
 5 | 
 6 | public enum DataAttributeType implements DataInterface {
 7 |     ORDERID(1, "orderid"), CONTRACTNO(2, "contractno"), TERMID(3, "termid"), LOANPAN(4, "loanpan"), RETURNPAN(5, "returnpan"),
 8 |     INSERTTIME(6, "inserttime"), RECOMMEND(7, "recommend"), USERID(8, "userid"), DEVICEID(9, "deviceid"),
 9 |     CERTNO(10, "certno"), EMAIL(11, "email"), COMPANY(12, "company"), MOBILE(13, "mobile"), COMPADDR(14, "compaddr"),
10 |     COMPPHONE(15, "compphone"), EMERGENCYCONTACTMOBILE(16, "emergencycontactmobile"),
11 |     CONTACTMOBILE(17, "contactmobile"), IPV4(18, "ipv4"), MSGPHONE(19, "msgphone"), TELECODE(20, "telecode");
12 |     //成员变量
13 |     private int sequence;
14 |     private String name;
15 | 
16 |     //构造方法
17 |     private DataAttributeType(int sequence, String name) {
18 |         this.sequence = sequence;
19 |         this.name = name;
20 |     }
21 | 
22 |     //自定义方法
23 |     public static String getColorName(int sequence) {
24 |         for (DataAttributeType c : DataAttributeType.values()) {
25 |             if (c.getSequence() == sequence)
26 |                 return c.name;
27 |         }
28 |         return null;
29 |     }
30 | 
31 |     //getter&setter
32 |     public int getSequence() {
33 |         return sequence;
34 |     }
35 | 
36 |     public void setSequence(int sequence) {
37 |         this.sequence = sequence;
38 |     }
39 | 
40 |     public String getName() {
41 |         return name;
42 |     }
43 | 
44 |     public void setName(String name) {
45 |         this.name = name;
46 |     }
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/neo4j/src/test/java/JavaKafkaSimpleConsumerAPITest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by Administrator on 2017/6/21 0021.
 3 |  *//*
 4 | 
 5 | 
 6 | import java.util.ArrayList;
 7 | import java.util.List;
 8 | 
 9 | */
10 | /**
11 |  * Created by gerry on 12/21.
12 |  *//*
13 | 
14 | public class JavaKafkaSimpleConsumerAPITest {
15 |     public static void main(String[] args) {
16 |         JavaKafkaSimpleConsumerAPI example = new JavaKafkaSimpleConsumerAPI();
17 |         long maxReads = 300;
18 |         String topic = "logCollect_cleanData";
19 |         int partitionID = 2;
20 | 
21 |         KafkaTopicPartitionInfo topicPartitionInfo = new KafkaTopicPartitionInfo(topic, partitionID);
22 |         List<KafkaBrokerInfo> seeds = new ArrayList<KafkaBrokerInfo>();
23 |         seeds.add(new KafkaBrokerInfo("192.168.0.211", 9092));
24 |         seeds.add(new KafkaBrokerInfo("192.168.0.212", 9092));
25 | 
26 |         try {
27 |             example.run(maxReads, topicPartitionInfo, seeds);
28 |         } catch (Exception e) {
29 |             e.printStackTrace();
30 |         }
31 | 
32 |         // 获取该topic所属的所有分区ID列表
33 |         System.out.println(example.fetchTopicPartitionIDs(seeds, topic, 100000, 64 * 1024, "client-id"));
34 |     }
35 | }
36 | */
37 | 


--------------------------------------------------------------------------------
/neo4j/src/test/java/KafkaBrokerInfo.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Kafka服务器连接参数
 3 |  * Created by Administrator on 2017/6/21 0021.
 4 |  */
 5 | 
 6 | public class KafkaBrokerInfo {
 7 |     // 主机名
 8 |     public final String brokerHost;
 9 |     // 端口号
10 |     public final int brokerPort;
11 | 
12 |     /**
13 |      * 构造方法
14 |      *
15 |      * @param brokerHost Kafka服务器主机或者IP地址
16 |      * @param brokerPort 端口号
17 |      */
18 |     public KafkaBrokerInfo(String brokerHost, int brokerPort) {
19 |         this.brokerHost = brokerHost;
20 |         this.brokerPort = brokerPort;
21 |     }
22 | 
23 |     /**
24 |      * 构造方法， 使用默认端口号9092进行构造
25 |      *
26 |      * @param brokerHost
27 |      */
28 |     public KafkaBrokerInfo(String brokerHost) {
29 |         this(brokerHost, 9092);
30 |     }
31 | }


--------------------------------------------------------------------------------
/neo4j/src/test/java/KafkaConsumer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by Administrator on 2017/6/8 0008.
 3 |  */
 4 | 
 5 | import kafka.consumer.ConsumerConfig;
 6 | import kafka.consumer.ConsumerIterator;
 7 | import kafka.consumer.KafkaStream;
 8 | import kafka.javaapi.consumer.ConsumerConnector;
 9 | import kafka.serializer.StringDecoder;
10 | import kafka.utils.VerifiableProperties;
11 | 
12 | import java.util.HashMap;
13 | import java.util.List;
14 | import java.util.Map;
15 | import java.util.Properties;
16 | 
17 | public class KafkaConsumer {
18 | 
19 |     private final ConsumerConnector consumer;
20 | //    private String TOPIC ="topic_creditloan_orderinfo_wait_score";
21 |     private String TOPIC ="logCollect_cleanData";
22 |     private KafkaConsumer() {
23 |         Properties props = new Properties();
24 |         //zookeeper 配置192.168.0.208:2181,1
25 |         props.put("zookeeper.connect", "192.168.0.208:2181,192.168.0.211:2181,192.168.0.212:2181");
26 | 
27 |         //group 代表一个消费组
28 | //        props.put("group.id", "test-consumer-group125");
29 |         props.put("group.id", "testcheatgraph");
30 | 
31 |         //zk连接超时
32 |         props.put("zookeeper.session.timeout.ms", "60000");
33 |         props.put("zookeeper.sync.time.ms", "200");
34 |         props.put("auto.commit.interval.ms", "1000");
35 |         props.put("auto.offset.reset", "smallest");
36 |         props.put("rebalance.max.retries", "5");
37 |         props.put("rebalance.backoff.ms", "12000");
38 |         //序列化类
39 |         props.put("serializer.class", "kafka.serializer.StringEncoder");
40 | 
41 |         ConsumerConfig config = new ConsumerConfig(props);
42 | 
43 |         consumer = kafka.consumer.Consumer.createJavaConsumerConnector(config);
44 |     }
45 | 
46 |     void consume() {
47 |         Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
48 |         topicCountMap.put(TOPIC, new Integer(1));
49 | 
50 |         StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
51 |         StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());
52 | 
53 |         Map<String, List<KafkaStream<String, String>>> consumerMap =
54 |                 consumer.createMessageStreams(topicCountMap, keyDecoder, valueDecoder);
55 |         KafkaStream<String, String> stream = consumerMap.get(TOPIC).get(0);
56 |         ConsumerIterator<String, String> it = stream.iterator();
57 |         while (it.hasNext())
58 |             System.out.println(it.next().message());
59 |     }
60 | 
61 |     public static void main(String[] args) {
62 |         new KafkaConsumer().consume();
63 |     }
64 | }


--------------------------------------------------------------------------------
/neo4j/src/test/java/KafkaTopicPartitionInfo.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by Administrator on 2017/6/21 0021.
 3 |  */
 4 | 
 5 | public class KafkaTopicPartitionInfo {
 6 |     // 主题名称
 7 |     public final String topic;
 8 |     // 分区id
 9 |     public final int partitionID;
10 | 
11 |     /**
12 |      * 构造函数
13 |      *
14 |      * @param topic       主题名称
15 |      * @param partitionID 分区id
16 |      */
17 |     public KafkaTopicPartitionInfo(String topic, int partitionID) {
18 |         this.topic = topic;
19 |         this.partitionID = partitionID;
20 |     }
21 | 
22 |     @Override
23 |     public boolean equals(Object o) {
24 |         if (this == o) return true;
25 |         if (o == null || getClass() != o.getClass()) return false;
26 | 
27 |         KafkaTopicPartitionInfo that = (KafkaTopicPartitionInfo) o;
28 | 
29 |         if (partitionID != that.partitionID) return false;
30 |         return topic != null ? topic.equals(that.topic) : that.topic == null;
31 | 
32 |     }
33 | 
34 |     @Override
35 |     public int hashCode() {
36 |         int result = topic != null ? topic.hashCode() : 0;
37 |         result = 31 * result + partitionID;
38 |         return result;
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/neo4j/src/test/java/OperatorKafka.java:
--------------------------------------------------------------------------------
 1 | //import com.lakala.datacenter.constant.StreamingConstant;
 2 | //import kafka.admin.AdminUtils;
 3 | //import org.I0Itec.zkclient.ZkClient;
 4 | //import org.I0Itec.zkclient.exception.ZkMarshallingError;
 5 | //import org.I0Itec.zkclient.serialize.ZkSerializer;
 6 | //
 7 | //import java.io.UnsupportedEncodingException;
 8 | //import java.util.Iterator;
 9 | //import java.util.Map;
10 | //import java.util.Properties;
11 | //
12 | ///**
13 | // * Created by Administrator on 2017/8/2 0002.
14 | // */
15 | //public class OperatorKafka {
16 | //    public static void main(String[] args) {
17 | //        createTopic();
18 | //    }
19 | //
20 | //    public static void createTopic() {
21 | //        ZkClient zkUtils = getZk();
22 | //// 创建一个单分区单副本名为t1的topic
23 | //        AdminUtils.createTopic(zkUtils, "logCollect_cleanData", 3, 1, new Properties());
24 | //        zkUtils.close();
25 | //    }
26 | //
27 | //    public static void deleteTopic() {
28 | //        ZkClient zkUtils = getZk();
29 | //// 创建一个单分区单副本名为t1的topic
30 | //        AdminUtils.deleteTopic(zkUtils, "logCollect_cleanData");
31 | //        zkUtils.close();
32 | //    }
33 | //
34 | //    public static void queryTopic() {
35 | //        ZkClient zkUtils = getZk();
36 | //        // 获取topic 'test'的topic属性属性
37 | //        Properties props = AdminUtils.fetchTopicConfig(zkUtils, "logCollect_cleanData");
38 | //// 查询topic-level属性
39 | //        Iterator it = props.entrySet().iterator();
40 | //        while (it.hasNext()) {
41 | //            Map.Entry entry = (Map.Entry) it.next();
42 | //            Object key = entry.getKey();
43 | //            Object value = entry.getValue();
44 | //            System.out.println(key + " = " + value);
45 | //        }
46 | //        zkUtils.close();
47 | //    }
48 | //
49 | //
50 | //    public static void updateTopic() {
51 | //        ZkClient zkUtils = getZk();
52 | //        Properties props = AdminUtils.fetchTopicConfig(zkUtils, "logCollect_cleanData");
53 | //// 增加topic级别属性
54 | //        props.put("min.cleanable.dirty.ratio", "0.3");
55 | //// 删除topic级别属性
56 | //        props.remove("max.message.bytes");
57 | //// 修改topic 'test'的属性
58 | //        AdminUtils.changeTopicConfig(zkUtils, "logCollect_cleanData", props);
59 | //    }
60 | //
61 | //    public static ZkClient getZk() {
62 | //        ZkClient zkUtils = new ZkClient("192.168.0.208:2181,192.168.0.211:2181,192.168.0.212:2181", 60000, 60000, new ZkSerializer() {
63 | //            @Override
64 | //            public byte[] serialize(Object data) throws ZkMarshallingError {
65 | //                try {
66 | //                    return data.toString().getBytes(StreamingConstant.CODE());
67 | //                } catch (UnsupportedEncodingException e) {
68 | //                    e.printStackTrace();
69 | //                }
70 | //                return new byte[0];
71 | //            }
72 | //
73 | //            @Override
74 | //            public Object deserialize(byte[] bytes) throws ZkMarshallingError {
75 | //                try {
76 | //                    return new String(bytes, StreamingConstant.CODE());
77 | //                } catch (UnsupportedEncodingException e) {
78 | //                    e.printStackTrace();
79 | //                }
80 | //                return new byte[0];
81 | //            }
82 | //        });
83 | //        return zkUtils;
84 | //    }
85 | //}
86 | 


--------------------------------------------------------------------------------
/neo4j/src/test/java/TestCypher.java:
--------------------------------------------------------------------------------
 1 | import org.neo4j.driver.v1.*;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import static org.neo4j.driver.v1.Values.parameters;
 6 | 
 7 | /**
 8 |  * Created by Administrator on 2017/8/2 0002.
 9 |  */
10 | public class TestCypher {
11 |     Driver driver = GraphDatabase.driver("bolt://localhost", AuthTokens.basic("neo4j", "123456"));
12 | 
13 |     public int addEmployees(final String companyName) {
14 |         try (Session session = driver.session()) {
15 |             int employees = 0;
16 |             List<Record> persons = session.readTransaction(new TransactionWork<List<Record>>() {
17 |                 @Override
18 |                 public List<Record> execute(Transaction tx) {
19 |                     return matchPersonNodes(tx);
20 |                 }
21 |             });
22 |             for (final Record person : persons) {
23 |                 employees += session.writeTransaction(new TransactionWork<Integer>() {
24 |                     @Override
25 |                     public Integer execute(Transaction tx) {
26 |                         tx.run("MATCH (emp:Person {name: $person_name}) " +
27 |                                         "MERGE (com:Company {name: $company_name}) " +
28 |                                         "MERGE (emp)-[:WORKS_FOR]->(com)",
29 |                                 parameters("person_name", person.get("name").asString(), "company_name",
30 |                                         companyName));
31 |                         return 1;
32 |                     }
33 |                 });
34 |             }
35 |             return employees;
36 |         }
37 |     }
38 | 
39 |     private static List<Record> matchPersonNodes(Transaction tx) {
40 |         return tx.run("MATCH (a:Person) RETURN a.name AS name").list();
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/BroadcastAccumulatorStreaming.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Created by Administrator on 2017/8/10 0010.
  3 |   */
  4 | 
  5 | import org.apache.spark.broadcast.Broadcast
  6 | import org.apache.spark.streaming.{Duration, StreamingContext}
  7 | import org.apache.spark.{Accumulator, SparkConf, SparkContext}
  8 | 
  9 | /**
 10 |   * Created by lxh on 2016/6/30.
 11 |   */
 12 | object BroadcastAccumulatorStreaming {
 13 | 
 14 |   /**
 15 |     * 声明一个广播和累加器！
 16 |     */
 17 |   private var broadcastList: Broadcast[List[String]] = _
 18 |   private var accumulator: Accumulator[Int] = _
 19 | 
 20 |   def main(args: Array[String]) {
 21 | 
 22 |     val sparkConf = new SparkConf().setMaster("local[4]").setAppName("broadcasttest")
 23 |     val sc = new SparkContext(sparkConf)
 24 | 
 25 |     /**
 26 |       * duration是ms
 27 |       */
 28 |     val ssc = new StreamingContext(sc, Duration(2000))
 29 |     // broadcastList = ssc.sparkContext.broadcast(util.Arrays.asList("Hadoop","Spark"))
 30 |     broadcastList = ssc.sparkContext.broadcast(List("Hadoop", "Spark"))
 31 |     accumulator = ssc.sparkContext.accumulator(0, "broadcasttest")
 32 | 
 33 |     /**
 34 |       * 获取数据！
 35 |       */
 36 |     val lines = ssc.socketTextStream("localhost", 9999)
 37 | 
 38 |     /**
 39 |       * 1.flatmap把行分割成词。
 40 |       * 2.map把词变成tuple(word,1)
 41 |       * 3.reducebykey累加value
 42 |       * (4.sortBykey排名)
 43 |       * 4.进行过滤。 value是否在累加器中。
 44 |       * 5.打印显示。
 45 |       */
 46 |     val words = lines.flatMap(line => line.split(" "))
 47 | 
 48 |     val wordpair = words.map(word => (word, 1))
 49 | 
 50 |     wordpair.filter(record => {
 51 |       broadcastList.value.contains(record._1)
 52 |     })
 53 | 
 54 | 
 55 |     val pair = wordpair.reduceByKey(_ + _)
 56 | 
 57 |     /**
 58 |       * 这个pair 是PairDStream<String, Integer>
 59 |       * 查看这个id是否在黑名单中，如果是的话，累加器就+1
 60 |       */
 61 |     /*    pair.foreachRDD(rdd => {
 62 |           rdd.filter(record => {
 63 | 
 64 |             if (broadcastList.value.contains(record._1)) {
 65 |               accumulator.add(1)
 66 |               return true
 67 |             } else {
 68 |               return false
 69 |             }
 70 | 
 71 |           })
 72 | 
 73 |         })*/
 74 | 
 75 |     val filtedpair = pair.filter(record => {
 76 |       if (broadcastList.value.contains(record._1)) {
 77 |         accumulator.add(record._2)
 78 |         true
 79 |       } else {
 80 |         false
 81 |       }
 82 | 
 83 |     }).print
 84 | 
 85 |     println("累加器的值" + accumulator.value)
 86 | 
 87 |     // pair.filter(record => {broadcastList.value.contains(record._1)})
 88 | 
 89 |      val keypair = pair.map(pair => (pair._2,pair._1))
 90 | 
 91 |     /**
 92 |       * 如果DStream自己没有某个算子操作。就通过转化transform！
 93 |       */
 94 |      keypair.transform(rdd => {
 95 |        rdd.sortByKey(false)//TODO
 96 |      })
 97 |     pair.print()
 98 |     ssc.start()
 99 |     ssc.awaitTermination()
100 | 
101 |   }
102 | 
103 | }
104 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/ClientRedisTest.scala:
--------------------------------------------------------------------------------
 1 | import com.lakala.datacenter.utils.RedisUtils
 2 | import redis.clients.jedis.JedisPubSub
 3 | 
 4 | /**
 5 |   * Created by Administrator on 2017/6/29 0029.
 6 |   */
 7 | object ClientRedisTest {
 8 |   def main(args: Array[String]): Unit = {
 9 |     val jedis = RedisUtils.jedisCluster()
10 |     println(jedis.subscribe(new ApplyPubSubListener(),args(0)))
11 |   }
12 | 
13 |   class ApplyPubSubListener extends JedisPubSub {
14 | 
15 |     override def onMessage(channel: String, message: String): Unit = {
16 |       System.out.println(channel + " onMessage=" + message)
17 |       super.onMessage(channel, message)
18 |     }
19 |     // 初始化订阅时候的处理
20 |     override def onSubscribe(channel: String, subscribedChannels: Int) {
21 |        System.out.println(channel + " onSubscribe=" + subscribedChannels);
22 |     }
23 | 
24 |     // 取消订阅时候的处理
25 |     override def onUnsubscribe(channel: String, subscribedChannels: Int) {
26 |        System.out.println(channel + "onUnsubscribe=" + subscribedChannels);
27 |     }
28 | 
29 |     // 初始化按表达式的方式订阅时候的处理
30 |     override def onPSubscribe(pattern: String, subscribedChannels: Int) {
31 |        System.out.println(pattern + "onPSubscribe=" + subscribedChannels);
32 |     }
33 | 
34 |     // 取消按表达式的方式订阅时候的处理
35 |     override def onPUnsubscribe(pattern: String, subscribedChannels: Int) {
36 |        System.out.println(pattern + "onPUnsubscribe=" + subscribedChannels);
37 |     }
38 | 
39 |     // 取得按表达式的方式订阅的消息后的处理
40 |     override def onPMessage(pattern: String, channel: String, message:String ) {
41 |       System.out.println(pattern + "onPMessage=" + channel + "=" + message);
42 |     }
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/CollectionUtil.scala:
--------------------------------------------------------------------------------
 1 | import scala.collection.mutable.ArrayBuffer
 2 | import scala.reflect.ClassTag
 3 | 
 4 | /**
 5 |   * Created by Administrator on 2017/8/15 0015.
 6 |   */
 7 | object CollectionUtil {
 8 |   /**
 9 |     * 对具有Traversable[(K, V)]类型的集合添加reduceByKey相关方法
10 |     *
11 |     * @param collection
12 |     * @param kt
13 |     * @param vt
14 |     * @tparam K
15 |     * @tparam V
16 |     */
17 |   implicit class CollectionHelper[K, V](collection: Traversable[(K, V)])(implicit kt: ClassTag[K], vt: ClassTag[V]) {
18 |     def reduceByKey(f: (V, V) => V): Traversable[(K, V)] = collection.groupBy(_._1).map { case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => (a._1, f(a._2, b._2))) }
19 | 
20 |     /**
21 |       * reduceByKey的同时，返回被reduce掉的元素的集合
22 |       *
23 |       * @param f
24 |       * @return
25 |       */
26 |     def reduceByKeyWithReduced(f: (V, V) => V)(implicit kt: ClassTag[K], vt: ClassTag[V]): (Traversable[(K, V)], Traversable[(K, V)]) = {
27 |       val reduced: ArrayBuffer[(K, V)] = ArrayBuffer()
28 |       val newSeq = collection.groupBy(_._1).map {
29 |         case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => {
30 |           val newValue: V = f(a._2, b._2)
31 |           val reducedValue: V = if (newValue == a._2) b._2 else a._2
32 |           val reducedPair: (K, V) = (a._1, reducedValue)
33 |           reduced += reducedPair
34 |           (a._1, newValue)
35 |         })
36 |       }
37 |       (newSeq, reduced.toTraversable)
38 |     }
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/ConsumerGroupExample.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Created by Administrator on 2017/8/7 0007.
 3 |   */
 4 | 
 5 | 
 6 | import com.lakala.datacenter.main.TrialConsumerKafka
 7 | 
 8 | object ConsumerGroupExample {
 9 |   def main(args: Array[String]): Unit = {
10 |     TrialConsumerKafka.main(Array("192.168.0.208:2181,192.168.0.211:2181,192.168.0.212:2181", "test-consumer-group",
11 |       "logCollect_cleanData", "3"))
12 |   }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/Main.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Created by Administrator on 2017/8/1 0001.
 3 |   */
 4 | import org.neo4j.driver.v1.GraphDatabase
 5 | import org.neo4j.driver.v1.AuthTokens
 6 | import com.lakala.datacenter.cypher.NeoData._
 7 | 
 8 | object Main {
 9 | 
10 |   def main(args: Array[String]): Unit = {
11 | 
12 |     val driver = GraphDatabase.driver("bolt://localhost", AuthTokens.basic("neo4j", "123456"))
13 | 
14 |     val session = driver.session();
15 | 
16 |     val nodes = allNodes(session)
17 | 
18 |     println(nodes.mkString("\n"))
19 |   }
20 | }


--------------------------------------------------------------------------------
/neo4j/src/test/scala/TestApiNeo4j.scala:
--------------------------------------------------------------------------------
 1 | //import java.util
 2 | //import java.util.Map
 3 | //
 4 | //import com.lakala.datacenter.constant.StreamingConstant
 5 | //import com.lakala.datacenter.utils.UtilsTools.properties
 6 | //import org.apache.commons.lang3.StringUtils.trim
 7 | //import org.neo4j.rest.graphdb.RestAPIFacade
 8 | //import org.neo4j.rest.graphdb.batch.CypherResult
 9 | //import org.neo4j.rest.graphdb.query.RestCypherQueryEngine
10 | //import org.neo4j.rest.graphdb.util.QueryResult
11 | //
12 | ///**
13 | //  * Created by Administrator on 2017/7/12 0012.
14 | //  */
15 | //object TestApiNeo4j {
16 | //  def main(args: Array[String]): Unit = {
17 | ////    val properies = properties(StreamingConstant.CONFIG)
18 | ////    val restAPI = new RestAPIFacade(trim(properies.getProperty(StreamingConstant.NEOIP)), trim(properies.getProperty(StreamingConstant.USER)), trim(properies.getProperty(StreamingConstant.PASSWORD)))
19 | //
20 | //        import scala.collection.JavaConversions._
21 | //    //
22 | //    //    //
23 | //    //    val orderno ="AX20160722090751068917"
24 | //    //    val centro = "500227198611307710"
25 | //    //    val applyNodeIndexs = restAPI.getNodesByLabelAndProperty("" + Labels.ApplyInfo, StreamingConstant.ORDERNO, orderno)
26 | //    //
27 | //    //    val apply = applyNodeIndexs.toList
28 | //    //    if (apply.size == 0) {
29 | //    //      val applyNode = restAPI.createNode(MapUtil.map(StreamingConstant.ORDERNO, orderno.toUpperCase,StreamingConstant.MODELNAME, Labels.ApplyInfo))
30 | //    //      applyNode.addLabel(Labels.ApplyInfo)
31 | //    //      applyNode.setProperty(StreamingConstant.ORDERNO,orderno)
32 | //    //
33 | //    //      val contentIndexs = restAPI.getNodesByLabelAndProperty("Identification", StreamingConstant.CONTENT, centro)
34 | //    //      val list = contentIndexs.toList
35 | //    //      println(list.size)
36 | //    //      var otherNode: RestNode = if (list.size == 0) {
37 | //    //        val otherNode2 = restAPI.createNode(MapUtil.map(StreamingConstant.MODELNAME, "Identification", StreamingConstant.CONTENT, centro))
38 | //    //        otherNode2.setProperty(StreamingConstant.CONTENT, centro)
39 | //    //        otherNode2.addLabel(Labels.Identification)
40 | //    //        otherNode2
41 | //    //      } else {
42 | //    //        applyNode.setProperty("cert_no", centro)
43 | //    //        list.get(0)
44 | //    //      }
45 | //    //
46 | //    //      applyNode.createRelationshipTo(otherNode, RelationshipTypes.identification)
47 | //    //      println(otherNode.getId)
48 | //    //      println(applyNode.getId)
49 | //    //    }
50 | //    val restAPI = new RestAPIFacade(trim("http://192.168.0.33:7474/db/data"), trim("neo4j"), trim("123456"))
51 | //    val result = restAPI.query("MATCH (:Person {name:'Keanu'})-[:ACTED_IN]->(:Movie {title:'Matrix'}) RETURN count(*) as c" ,null)
52 | //    val it = result.getData
53 | //    it.flatten.toList.get(0)
54 | //    println(it.flatten.toList.get(0))
55 | //  }
56 | //}
57 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/TestCypher.scala:
--------------------------------------------------------------------------------
 1 | import com.lakala.datacenter.common.utils.DateTimeUtils
 2 | import com.lakala.datacenter.constant.StreamingConstant
 3 | import org.apache.commons.lang3.StringUtils
 4 | import org.joda.time.DateTime
 5 | import org.neo4j.driver.v1._
 6 | 
 7 | /**
 8 |   * Created by Administrator on 2017/8/2 0002.
 9 |   */
10 | object TestCypher2 {
11 |   val driver: Driver = GraphDatabase.driver("bolt://localhost:7687", AuthTokens.basic("neo4j", "123456"))
12 | 
13 |   def main(args: Array[String]): Unit = {
14 |     var map: java.util.HashMap[String, String] = new java.util.HashMap[String, String]()
15 |     var paramMap: java.util.HashMap[String, String] = new java.util.HashMap[String, String]()
16 |     map.put("orderno", "TNA20170623102711010234032084429")
17 |     map.put("_DeviceId", "A000005966DFEA")
18 |     map.put("mobile", "18961922790")
19 | 
20 |     runCypherApply(driver.session(), map)
21 |     driver.close()
22 |   }
23 | 
24 |   private def runCypherApply(session: Session, map: java.util.HashMap[String, String]): Unit = {
25 |     val applyStatementTemplate = new StringBuffer("MERGE (apply:ApplyInfo {orderno:$orderno})")
26 |     applyStatementTemplate.append(" ON MATCH SET apply.modelname='ApplyInfo',apply.insertTime=$insertTime,apply.user_id=$user_id")
27 |     val otherStatementTemplate = new StringBuffer()
28 |     val relStatementTemplate = new StringBuffer()
29 | 
30 |     var paramMap: java.util.HashMap[String, Object] = new java.util.HashMap[String, Object]()
31 |     paramMap.put("orderno", map.getOrDefault(StreamingConstant.ORDERNO, ""))
32 |     paramMap.put(StreamingConstant.INSERTTIME, DateTimeUtils.formatter.print(DateTime.now()))
33 |     paramMap.put(StreamingConstant.USER_ID, map.getOrDefault(StreamingConstant.USERID, ""))
34 | 
35 |     for (key <- StreamingConstant.fieldMap.keySet) {
36 |       val fieldRelation = StreamingConstant.fieldMap.get(key).get.split(",")
37 |       if (StringUtils.isNoneEmpty(map.get(key))) {
38 |         val modelname = "" + StreamingConstant.labelMap.get(key).get
39 |         val rel = "" + StreamingConstant.relationShipMap.get(key).get
40 |         otherStatementTemplate.append(" MERGE (" + key + ":" + modelname + "{modelname:'" + modelname + "',content:$" + fieldRelation(0) + "})")
41 |         otherStatementTemplate.append(" MERGE (apply)-[:" + rel + "]->(" + key + ")")
42 |         applyStatementTemplate.append(",apply." + fieldRelation(0) + "=$" + fieldRelation(0))
43 |         paramMap.put(fieldRelation(0), map.get(key))
44 |       }
45 |     }
46 | 
47 |     val statementStr = applyStatementTemplate.append(otherStatementTemplate).toString
48 |     println(statementStr)
49 |     session.writeTransaction(new TransactionWork[Integer]() {
50 |       override def execute(tx: Transaction): Integer = {
51 |         tx.run(statementStr, paramMap)
52 |         1
53 |       }
54 |     })
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/TestKafka.scala:
--------------------------------------------------------------------------------
 1 | import com.lakala.datacenter.constant.StreamingConstant
 2 | import kafka.utils.ZkUtils
 3 | import org.I0Itec.zkclient.ZkClient
 4 | import org.I0Itec.zkclient.exception.ZkMarshallingError
 5 | import org.I0Itec.zkclient.serialize.ZkSerializer
 6 | /**
 7 |   * Created by Administrator on 2017/6/12 0012.
 8 |   */
 9 | object TestKafka {
10 |   def main(args: Array[String]): Unit = {
11 |         val topic = "logCollect_cleanData"
12 |         val zkConnect = "192.168.0.211:2181,192.168.0.212:2181"
13 |         var zkClient: ZkClient = null
14 |         try {
15 |           zkClient = new ZkClient(zkConnect, 30000, 30000, new ZkSerializer {
16 |             override def serialize(data: Object): Array[Byte] = {
17 |               try {
18 |                 return data.toString().getBytes(StreamingConstant.CODE)
19 |               } catch {
20 |                 case e: ZkMarshallingError => return null
21 | 
22 |               }
23 |             }
24 | 
25 |             override def deserialize(bytes: Array[Byte]): Object = {
26 |               try {
27 |                 return new String(bytes, StreamingConstant.CODE)
28 |               } catch {
29 |                 case e: ZkMarshallingError => return null
30 |               }
31 |             }
32 |           })
33 |           zkClient.deleteRecursive(ZkUtils.getTopicPath(topic))  //其实最终还是通过删除zk里面对应的路径来实现删除topic的功能
34 |           println("deletion succeeded!")
35 |         }
36 |         catch {
37 |           case e: Throwable =>
38 |             println("delection failed because of " + e.getMessage)
39 |     //        println(Utils.stackTrace(e))
40 |         }
41 |         finally {
42 |           if (zkClient != null)
43 |             zkClient.close()
44 |         }
45 | 
46 | 
47 |     //    import org.I0Itec.zkclient.ZkClient
48 |     //    val arrys = new Array[String](6)
49 |     //    arrys(0) = "--replication-factor"
50 |     //    arrys(1) = "1"
51 |     //    arrys(2) = "--partitions"
52 |     //    arrys(3) = "3"
53 |     //    arrys(4) = "--topic"
54 |     //    arrys(5) = "logCollect_cleanData"
55 |     //    val client = new ZkClient("192.168.0.211:2181,192.168.0.212:2181", 30000, 30000, ZKStringSerializer)
56 |     //    client.setZkSerializer(ZKStringSerializer) //一定要加上ZkSerializer
57 |     //
58 |     //
59 |     //    val opts = new TopicCommand.TopicCommandOptions(arrys)
60 |     //    TopicCommand.createTopic(client, opts)
61 | 
62 | //    import kafka.admin.AdminUtils
63 | //    val client = new ZkClient("192.168.0.211:2181,192.168.0.212:2181", 30000, 30000)
64 |     //     创建一个单分区单副本名为t1的topic
65 | //    val props: Properties = new Properties
66 |     //此处配置的是kafka的端口
67 | //    props.put("metadata.broker.list", "192.168.0.211:9092,192.168.0.212:9092")
68 |     //配置value的序列化类
69 | //    props.put("serializer.class", "kafka.serializer.StringEncoder")
70 |     //配置key的序列化类
71 | //    props.put("key.serializer.class", "kafka.serializer.StringEncoder")
72 |     //request.required.acks
73 | //    props.put("request.required.acks", "-1")
74 | //    AdminUtils.createTopic(client, "logCollect_cleanData", 3, 1, props)
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/TestRedis.scala:
--------------------------------------------------------------------------------
 1 | import java.util
 2 | 
 3 | import com.alibaba.fastjson.{JSON, JSONObject}
 4 | import com.lakala.datacenter.common.utils.DateTimeUtils
 5 | import com.lakala.datacenter.constant.StreamingConstant
 6 | import com.lakala.datacenter.utils.RedisUtils
 7 | import org.joda.time.DateTime
 8 | import redis.clients.jedis.JedisPubSub
 9 | 
10 | 
11 | 
12 | /**
13 |   * Created by Administrator on 2017/6/29 0029.
14 |   */
15 | object TestRedis {
16 |   def main(args: Array[String]): Unit = {
17 | 
18 | //
19 |     val jedis = RedisUtils.jedisCluster()
20 |     try {
21 |       val orderno = args(0)
22 |       val insertTime=Map(StreamingConstant.INSERTTIME->"2017-06-30 12:01:10").getOrElse(StreamingConstant.INSERTTIME, DateTimeUtils.formatter.print(DateTime.now()))
23 |       val s= "{\""+StreamingConstant.ORDERNO+"\":\""+orderno+"\",\""+StreamingConstant.INSERT_TIME+"\":\""+insertTime+"\"}"
24 |       jedis.publish("testsub12",  s)
25 |       println(s)
26 |       println(JSON.parseObject(s).getString(StreamingConstant.INSERT_TIME))
27 |     } catch {
28 |       case e: Exception => println("AAAAAAAAA"+e.getMessage)
29 |     }
30 | 
31 |   }
32 | 
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/org/neo4j/spark/ExplortApplyDataTest.scala:
--------------------------------------------------------------------------------
 1 | package org.neo4j.spark
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | import scala.collection.mutable.ArrayBuffer
 6 | 
 7 | /**
 8 |   * Created by Administrator on 2017/5/9 0009.
 9 |   *
10 |   */
11 | object ExplortApplyDataTest {
12 |   def main(args: Array[String]): Unit = {
13 | //    ExplortApplyData2.main(Array("192.168.0.33","file:///F:/output/out","BankCard,Device,Mobile",Email"))
14 |     val conf = new SparkConf().setMaster("local[1]").setAppName("test")
15 |     val  sc = new SparkContext(conf)
16 |     printSql(sc)
17 |     println(System.getProperty("java.io.tmpdir"))
18 |   }
19 |   def printSql(sc:SparkContext)={
20 |     val map = Map("applymymobile" -> "Mobile","loanapply" -> "Mobile","emergencymobile" -> "Mobile", "device" -> "Device", "bankcard" -> "BankCard", "identification" -> "Identification", "email" -> "Email")
21 |     val modelRdd = sc.parallelize(List("BankCard", "Device", "Mobile", "Email"))
22 | 
23 |     val broadcastVar2 = sc.broadcast(map)
24 |     modelRdd.foreachPartition { models =>
25 |       models.foreach { model =>
26 |         runQueryApplyByApplyLevel1(broadcastVar2.value, model)
27 |       }
28 |     }
29 |   }
30 |   def runQueryApplyByApplyLevel1(map: Map[String, String],modelname: String):Unit = {
31 | 
32 |     val list = new ArrayBuffer[String]()
33 |     for (k <- map.keySet) {
34 |       for (k2 <- map.keySet) {
35 |         if (k2.equals("applymymobile") || k2.equals("loanapply") || k2.equals("emergencymobile")) {
36 |           list += s"match (n:$modelname {type:'1'})-[r1:${k}] -(p:ApplyInfo)-[r2:${k2}]-(m:${map.get(k2).get})-[r3:applymymobile]-(q:ApplyInfo) return n.content,p.orderno,m.content,q.orderno@@$k==$k2==applymymobile"
37 |           list += s"match (n:$modelname {type:'1'})-[r1:${k}] -(p:ApplyInfo)-[r2:${k2}]-(m:${map.get(k2).get})-[r3:loanapply]-(q:ApplyInfo) return n.content,p.orderno,m.content,q.orderno@@$k==$k2==loanapply"
38 |           list += s"match (n:$modelname {type:'1'})-[r1:${k}] -(p:ApplyInfo)-[r2:${k2}]-(m:${map.get(k2).get})-[r3:emergencymobile]-(q:ApplyInfo) return n.content,p.orderno,m.content,q.orderno@@$k==$k2=emergencymobile"
39 |         } else {
40 |           list += s"match (n:$modelname {type:'1'})-[r1:${k}] -(p:ApplyInfo)-[r2:${k2}]-(m:${map.get(k2).get})-[r3:${k2}]-(q:ApplyInfo) return n.content,p.orderno,m.content,q.orderno@@$k==$k2==$k2"
41 |         }
42 |       }
43 |     }
44 |     list.map { sql =>
45 |       val arr = sql.split("@@")
46 |       println(arr(0))
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/org/neo4j/spark/MainTest.scala:
--------------------------------------------------------------------------------
 1 | package org.neo4j.spark
 2 | 
 3 | import com.lakala.datacenter.main.Main
 4 | 
 5 | /**
 6 |   * Created by Administrator on 2017/6/2 0002.
 7 |   */
 8 | object MainTest {
 9 |   def main(args: Array[String]): Unit = {
10 |     //-i F:\tmp\applydir
11 |     Main.main(args)
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/org/neo4j/spark/Neo4jContstanTest.scala:
--------------------------------------------------------------------------------
 1 | package org.neo4j.spark
 2 | 
 3 | /**
 4 |   * Created by Administrator on 2017/7/14 0014.
 5 |   */
 6 | object Neo4jContstanTest {
 7 |   val SERVER_BOLTURI ="bolt://192.168.0.33:7687"
 8 |   val RESTNEO4JURL ="http://192.168.0.33:7474/db/data"
 9 | }
10 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/org/neo4j/spark/Neo4jDataFrameScalaTest.scala:
--------------------------------------------------------------------------------
 1 | //package org.neo4j.spark
 2 | //
 3 | //import java.io.File
 4 | //
 5 | //import com.lakala.datacenter.load.spark.{Neo4jDataFrame, Neo4jGraph}
 6 | //import org.apache.commons.lang3.StringUtils.trim
 7 | //import org.apache.spark.api.java.JavaSparkContext
 8 | //import org.apache.spark.graphx.{Edge, Graph}
 9 | //import org.apache.spark.rdd.RDD
10 | //import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
11 | //import org.apache.spark.sql.{Row, SQLContext}
12 | //import org.apache.spark.{SparkConf, SparkContext}
13 | //import org.junit.Assert._
14 | //import org.junit._
15 | //import org.neo4j.harness.{ServerControls, TestServerBuilders}
16 | //import org.neo4j.rest.graphdb.RestAPIFacade
17 | //import org.neo4j.rest.graphdb.batch.CypherResult
18 | //
19 | //
20 | ///**
21 | //  * @author lys
22 | //  * @since 17.07.16
23 | //  */
24 | //class Neo4jDataFrameScalaTest {
25 | //  val FIXTURE: String = "CREATE (:A)-[:REL {foo:'bar'}]->(:B)"
26 | //  private var conf: SparkConf = null
27 | //  private var sc: JavaSparkContext = null
28 | //  private var server: ServerControls = null
29 | //  private val path:String ="F:\\tmp\\neo4j\\tmp02"
30 | //  private var restAPI:RestAPIFacade = null
31 | //  @Before
32 | //  @throws[Exception]
33 | //  def setUp {
34 | ////    server = TestServerBuilders.newInProcessBuilder(new File(path)).withConfig("dbms.security.auth_enabled", "false").withFixture(FIXTURE).newServer
35 | //   restAPI = new RestAPIFacade(trim(Neo4jContstanTest.RESTNEO4JURL), trim("neo4j"), trim("123456"))
36 | //
37 | //    conf = new SparkConf().setAppName("neoTest").setMaster("local[*]").set("spark.driver.allowMultipleContexts", "true").set("spark.neo4j.bolt.url", Neo4jContstanTest.SERVER_BOLTURI)
38 | //    sc = SparkContext.getOrCreate(conf)
39 | //  }
40 | //
41 | //  @After def tearDown {
42 | ////    server.close
43 | //    sc.close
44 | //  }
45 | //
46 | //  @Test def mergeEdgeList {
47 | //    val rows = sc.makeRDD(Seq(Row("Keanu", "Matrix")))
48 | //    val schema = StructType(Seq(StructField("name", DataTypes.StringType), StructField("title", DataTypes.StringType)))
49 | //    val sqlContext = new SQLContext(sc)
50 | //    val df = sqlContext.createDataFrame(rows, schema)
51 | //    Neo4jDataFrame.mergeEdgeList(sc, df, ("Person", Seq("name")), ("ACTED_IN", Seq.empty), ("Movie", Seq("title")))
52 | //    val edges: RDD[Edge[Long]] = sc.makeRDD(Seq(Edge(0, 1, 42L)))
53 | //    val graph = Graph.fromEdges(edges, -1)
54 | //    assertEquals(2, graph.vertices.count)
55 | //    assertEquals(1, graph.edges.count)
56 | //    Neo4jGraph.saveGraph(sc, graph, null, "test")
57 | //
58 | ////    val it: ResourceIterator[Long] = server.graph().execute("MATCH (:Person {name:'Keanu'})-[:ACTED_IN]->(:Movie {title:'Matrix'}) RETURN count(*) as c").columnAs("c")
59 | //    val result: CypherResult = restAPI.query("MATCH (:Person {name:'Keanu'})-[:ACTED_IN]->(:Movie {title:'Matrix'}) RETURN count(*) as c" ,null)
60 | //    import scala.collection.JavaConversions._
61 | //    assertEquals(1L, result.getData.flatten.toList.get(0).toString.toLong)
62 | //    restAPI.close()
63 | //  }
64 | //}
65 | //
66 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/org/neo4j/spark/Neo4jGraphScalaTest.scala:
--------------------------------------------------------------------------------
 1 | package org.neo4j.spark
 2 | 
 3 | import com.lakala.datacenter.load.spark.{Executor, Neo4jGraph}
 4 | import org.apache.spark.api.java.JavaSparkContext
 5 | import org.apache.spark.graphx.{Edge, Graph}
 6 | import org.apache.spark.rdd.RDD
 7 | import org.apache.spark.{SparkConf, SparkContext}
 8 | import org.junit.Assert._
 9 | import org.junit._
10 | 
11 | import scala.collection.JavaConverters._
12 | 
13 | 
14 | /**
15 |   * @author lys
16 |   * @since 17.07.16
17 |   */
18 | class Neo4jGraphScalaTest {
19 |   val FIXTURE: String = "CREATE (:A)-[:REL {foo:'bar'}]->(:B)"
20 |   private var conf: SparkConf = null
21 |   private var sc: JavaSparkContext = null
22 |   //  private var server: ServerControls = null
23 | 
24 |   @Before
25 |   @throws[Exception]
26 |   def setUp {
27 |     //    server = TestServerBuilders.newInProcessBuilder.withConfig("dbms.security.auth_enabled", "false").withFixture(FIXTURE).newServer
28 |     conf = new SparkConf().setAppName("neoTest").setMaster("local[*]").set("spark.driver.allowMultipleContexts", "true")
29 |       .set("spark.neo4j.bolt.url", Neo4jContstanTest.SERVER_BOLTURI)
30 |     sc = SparkContext.getOrCreate(conf)
31 |   }
32 | 
33 |   @After def tearDown {
34 |     //    server.close()
35 |     sc.close
36 |   }
37 | 
38 |   @Test def runCypherQueryWithParams {
39 |     val data = List(Map("id" -> 3, "name" -> "Test3").asJava, Map("id" -> 2, "name" -> "Test2").asJava).asJava
40 |     Executor.execute(sc.sc, "UNWIND {data} as row MERGE (n:Test {id:row.id}) SET n.name = row.name", Map(("data", data)))
41 |   }
42 | 
43 |   @Test def runMatrixQuery {
44 |     val graph = Neo4jGraph.loadGraph(sc.sc, "A", Seq.empty, "B")
45 |     assertEquals(2, graph.vertices.count)
46 |     assertEquals(1, graph.edges.count)
47 |   }
48 | 
49 |   @Test def saveGraph {
50 |     val edges: RDD[Edge[Long]] = sc.makeRDD(Seq(Edge(0, 1, 42L)))
51 |     val graph = Graph.fromEdges(edges, -1)
52 |     assertEquals(2, graph.vertices.count)
53 |     assertEquals(1, graph.edges.count)
54 |     Neo4jGraph.saveGraph(sc, graph, null, "test")
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/neo4j/src/test/scala/org/neo4j/spark/Neo4jRestSparkTest.scala:
--------------------------------------------------------------------------------
 1 | package org.neo4j.spark
 2 | 
 3 | import com.lakala.datacenter.load.spark.Neo4j
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | 
 6 | /**
 7 |   * Created by Administrator on 2017/5/11 0011.
 8 |   */
 9 | object Neo4jRestSparkTest {
10 |   def main(args: Array[String]): Unit = {
11 |     val conf = new SparkConf().setAppName("neoTest").setMaster("local[2]")
12 |     /*.set("spark.neo4j.bolt.url","jdbc:neo4j:bolt:192.168.0.33:7687")*//*.set("spark.driver.allowMultipleContexts", "true").set("spark.neo4j.bolt.url", server.boltURI.toString)*/
13 |     val sc = new SparkContext(conf)
14 |     runCypherRelQueryWithPartition(sc)
15 |   }
16 | 
17 |   def runCypherRelQueryWithPartition(sc: SparkContext) {
18 |     val neo4j: Neo4j = Neo4j(sc).cypher("match (n:Mobile {type:'1'})-[r1:loanapply] -(p:ApplyInfo)-[r2:loanapply]-(m:Mobile)-[r3:loanapply]-(q:ApplyInfo) return n.content as content1 ,type(r1) as value1,p.orderno as orderno1,type(r2) as value2,m.content as content2,type(r3) as value3,q.orderno as orderno2 ").partitions(7).batch(200)
19 |     val knows: Long = neo4j.loadRowRdd.count()
20 |     println(knows)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------