├── .gitignore
├── README.md
├── pom.xml
├── src
└── main
│ ├── java
│ └── com
│ │ └── spark
│ │ ├── rdd
│ │ └── tutorial
│ │ │ └── util
│ │ │ └── MyIterator.java
│ │ └── rdd_tutorial
│ │ ├── Tutorial13
│ │ ├── CustomPartitionRdd.java
│ │ ├── HashPartitionerRdd.java
│ │ └── JavaCustomPart.java
│ │ ├── tutorial1
│ │ └── ParallelizeRdd.java
│ │ ├── tutorial10
│ │ ├── CollectAsMapRdd.java
│ │ ├── CountByKeyRdd.java
│ │ └── tutorail10.md
│ │ ├── tutorial11
│ │ └── tutorial11.md
│ │ ├── tutorial12
│ │ ├── MapPartitionsRdd.java
│ │ └── MapPartitionsWithIndexRdd.java
│ │ ├── tutorial2
│ │ ├── FilterRdd.java
│ │ ├── FlatMapRdd.java
│ │ └── MapRdd.java
│ │ ├── tutorial3
│ │ └── DisUnionAndSoOn.java
│ │ ├── tutorial4
│ │ ├── FlatMapToPairRdd.java
│ │ └── MapToPairRdd.java
│ │ ├── tutorial5
│ │ ├── CombineByKeyRdd.java
│ │ └── ScoreDetail.java
│ │ ├── tutorial6
│ │ ├── FoldByKey.java
│ │ ├── ReduceByKeyRdd.java
│ │ └── SortByKey.java
│ │ ├── tutorial7
│ │ ├── CogroupRdd.java
│ │ └── GroupByKeyRdd.java
│ │ ├── tutorial8
│ │ ├── JoinRDD.java
│ │ └── readme.txt
│ │ └── tutorial9
│ │ ├── SparkAction1.java
│ │ └── readme.md
│ └── resources
│ └── filter_sample.txt
└── target
└── classes
├── com
└── spark
│ ├── rdd
│ └── tutorial
│ │ └── util
│ │ └── MyIterator.class
│ └── rdd_tutorial
│ ├── Tutorial13
│ ├── CustomPartitionRdd.class
│ ├── HashPartitionerRdd$1.class
│ ├── HashPartitionerRdd$2.class
│ ├── HashPartitionerRdd.class
│ └── JavaCustomPart.class
│ ├── tutorial1
│ └── ParallelizeRdd.class
│ ├── tutorial10
│ ├── CollectAsMapRdd.class
│ └── CountByKeyRdd.class
│ ├── tutorial12
│ ├── MapPartitionsRdd$1.class
│ ├── MapPartitionsRdd$2.class
│ ├── MapPartitionsRdd$3.class
│ ├── MapPartitionsRdd$4.class
│ ├── MapPartitionsRdd.class
│ ├── MapPartitionsWithIndexRdd$1.class
│ ├── MapPartitionsWithIndexRdd$2.class
│ ├── MapPartitionsWithIndexRdd$3.class
│ ├── MapPartitionsWithIndexRdd$4.class
│ ├── MapPartitionsWithIndexRdd$5.class
│ └── MapPartitionsWithIndexRdd.class
│ ├── tutorial2
│ ├── FilterRdd$1.class
│ ├── FilterRdd.class
│ ├── FlatMapRdd$1.class
│ ├── FlatMapRdd$2.class
│ ├── FlatMapRdd.class
│ ├── MapRdd$1.class
│ ├── MapRdd$2.class
│ └── MapRdd.class
│ ├── tutorial3
│ └── DisUnionAndSoOn.class
│ ├── tutorial4
│ ├── FlatMapToPairRdd$1.class
│ ├── FlatMapToPairRdd$2.class
│ ├── FlatMapToPairRdd.class
│ ├── MapToPairRdd$1.class
│ ├── MapToPairRdd$2.class
│ └── MapToPairRdd.class
│ ├── tutorial5
│ ├── CombineByKeyRdd$1.class
│ ├── CombineByKeyRdd$2.class
│ ├── CombineByKeyRdd$3.class
│ ├── CombineByKeyRdd$4.class
│ ├── CombineByKeyRdd.class
│ └── ScoreDetail.class
│ ├── tutorial6
│ ├── FoldByKey.class
│ ├── ReduceByKeyRdd$1.class
│ ├── ReduceByKeyRdd$2.class
│ ├── ReduceByKeyRdd.class
│ └── SortByKey.class
│ ├── tutorial7
│ ├── CogroupRdd.class
│ ├── GroupByKeyRdd$1.class
│ ├── GroupByKeyRdd$2.class
│ └── GroupByKeyRdd.class
│ ├── tutorial8
│ ├── JoinRDD$1.class
│ ├── JoinRDD$2.class
│ ├── JoinRDD$3.class
│ ├── JoinRDD$4.class
│ └── JoinRDD.class
│ └── tutorial9
│ └── SparkAction1.class
└── filter_sample.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/workspace.xml
2 | spark_tutorial.iml
3 | spark_tutorial.iml
4 | .idea/*
5 | spark_tutorial.iml
6 | .idea
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | spark RDD的算子挺多,有时候如何灵活的使用,该如何用一下子想不起来,这一段时间将spark的算子如何使用的例子给记录了下来,下面是spark RDD 的一些常用算子的使用
2 | 这些算子包括有java的,也有scala的语言,由于精力有限,暂时没有python的,以后有空再加上吧
3 | [spark RDD算子(一) parallelize,makeRDD,textFile](http://blog.csdn.net/t1dmzks/article/details/70189509)
4 |
5 |
6 | [spark RDD算子(二) filter,map ,flatMap](http://blog.csdn.net/t1dmzks/article/details/70198393)
7 |
8 |
9 | [spark RDD算子(三) distinct,union,intersection,subtract,cartesian](http://blog.csdn.net/t1dmzks/article/details/70198430)
10 |
11 |
12 | [spark RDD算子(四)之创建键值对RDD mapToPair flatMapToPair](http://blog.csdn.net/t1dmzks/article/details/70234272)
13 |
14 |
15 | [spark RDD算子(五)之键值对聚合操作 combineByKey](http://blog.csdn.net/t1dmzks/article/details/70249743)
16 |
17 |
18 | [spark RDD算子(六)之键值对聚合操作reduceByKey,foldByKey,排序操作sortByKey](http://blog.csdn.net/t1dmzks/article/details/70342732)
19 |
20 |
21 | [spark RDD算子(七)之键值对分组操作 groupByKey,cogroup](http://blog.csdn.net/t1dmzks/article/details/70549752)
22 |
23 |
24 | [spark RDD算子(八)之键值对关联操作 subtractByKey, join, rightOuterJoin, leftOuterJoin](http://blog.csdn.net/t1dmzks/article/details/70557249)
25 |
26 |
27 | [spark RDD算子(九)之基本的Action操作 first, take, collect, count, countByValue, reduce, aggregate, fold,top](http://blog.csdn.net/t1dmzks/article/details/70667011)
28 |
29 |
30 | [spark RDD算子(十)之PairRDD的Action操作countByKey, collectAsMap](http://blog.csdn.net/t1dmzks/article/details/70833185)
31 |
32 |
33 | [spark RDD算子(十一)之RDD Action 保存操作saveAsTextFile,saveAsSequenceFile,saveAsObjectFile,saveAsHadoopFile 等](http://blog.csdn.net/t1dmzks/article/details/71037850)
34 |
35 |
36 |
37 | [spark RDD算子(十二)之RDD 分区操作上mapPartitions, mapPartitionsWithIndex](http://blog.csdn.net/t1dmzks/article/details/71336119)
38 |
39 |
40 | [spark RDD算子(十三)之RDD 分区 HashPartitioner,RangePartitioner,自定义分区](http://blog.csdn.net/t1dmzks/article/details/71374418)
41 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.zks
8 | spark_tutorial
9 | 1.0-SNAPSHOT
10 |
11 |
12 | UTF-8
13 | 2.10
14 | 2.1.0
15 | 2.11.8
16 | 2.7.1
17 |
18 |
19 |
20 |
21 | org.apache.spark
22 | spark-core_${spark.version}
23 | ${spark_id_version}
24 |
25 |
26 | org.apache.spark
27 | spark-sql_${spark.version}
28 | ${spark_id_version}
29 |
30 |
31 | org.apache.spark
32 | spark-hive_${spark.version}
33 | ${spark_id_version}
34 |
35 |
36 | org.apache.spark
37 | spark-streaming_${spark.version}
38 | ${spark_id_version}
39 |
40 |
41 |
42 | org.apache.spark
43 | spark-streaming-kafka_${spark.version}
44 | 1.3.0
45 |
46 |
47 | org.apache.spark
48 | spark-mllib_${spark.version}
49 | ${spark_id_version}
50 |
51 |
52 | mysql
53 | mysql-connector-java
54 | 5.1.39
55 |
56 |
57 | junit
58 | junit
59 | 4.12
60 |
61 |
62 | org.slf4j
63 | slf4j-api
64 | 1.6.6
65 |
66 |
67 | org.slf4j
68 | slf4j-log4j12
69 | 1.6.6
70 |
71 |
72 | log4j
73 | log4j
74 | 1.2.16
75 |
76 |
77 |
78 |
79 |
80 | org.apache.hadoop
81 | hadoop-client
82 | ${hadoop.version}
83 |
84 |
85 | org.apache.hadoop
86 | hadoop-common
87 | ${hadoop.version}
88 |
89 |
90 | org.apache.hadoop
91 | hadoop-hdfs
92 | ${hadoop.version}
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 | central
104 | Central Repository
105 | http://maven.aliyun.com/nexus/content/repositories/central
106 | default
107 |
108 | false
109 |
110 |
111 |
112 |
113 |
114 |
115 | src/main/java
116 |
117 | src/test/java
118 |
119 |
120 |
121 |
122 | org.apache.maven.plugins
123 | maven-compiler-plugin
124 | 3.3
125 |
126 | 1.8
127 | 1.8
128 | UTF-8
129 |
130 |
131 |
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd/tutorial/util/MyIterator.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd.tutorial.util;
2 |
3 | import java.util.Iterator;
4 |
5 | public class MyIterator implements Iterator, Iterable
6 | {
7 | private Iterator myIterable;
8 |
9 | public MyIterator(Iterable iterable)
10 | {
11 | myIterable = iterable.iterator();
12 | }
13 |
14 | @Override
15 | public boolean hasNext()
16 | {
17 | return myIterable.hasNext();
18 | }
19 |
20 | @Override
21 | public Object next()
22 | {
23 | return myIterable.next();
24 | }
25 |
26 | @Override
27 | public void remove()
28 | {
29 | myIterable.remove();
30 | }
31 |
32 | @Override
33 | public Iterator iterator()
34 | {
35 | return myIterable;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/Tutorial13/CustomPartitionRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.Tutorial13;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import scala.Tuple2;
8 |
9 | import java.util.Arrays;
10 |
11 | /**
12 | * 自定义分区
13 | * Created by zhaikaishun on 2017/8/20.
14 | */
15 | public class CustomPartitionRdd {
16 | public static void main(String[] args) {
17 | SparkConf sparkConf = new SparkConf().setAppName("CustomPartitionRdd").setMaster("local");
18 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
19 | sc.setLogLevel("WARN");
20 | JavaRDD> tupRdd = sc.parallelize(Arrays.asList(new Tuple2(1, 1), new Tuple2(1, 2)
21 | , new Tuple2(2, 3), new Tuple2(2, 4)
22 | , new Tuple2(3, 5), new Tuple2(3, 6)
23 | , new Tuple2(4, 7), new Tuple2(4, 8)
24 | , new Tuple2(5, 9), new Tuple2(5, 10)
25 | ), 3);
26 | JavaPairRDD pairRDD = JavaPairRDD.fromJavaRDD(tupRdd);
27 |
28 | System.out.println("============CustomPartition==================");
29 | JavaPairRDD customPart = pairRDD.partitionBy(new JavaCustomPart(3));
30 | HashPartitionerRdd.printPartRDD(customPart);
31 |
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.Tutorial13;
2 |
3 | import org.apache.spark.HashPartitioner;
4 | import org.apache.spark.SparkConf;
5 | import org.apache.spark.api.java.JavaPairRDD;
6 | import org.apache.spark.api.java.JavaRDD;
7 | import org.apache.spark.api.java.JavaSparkContext;
8 | import org.apache.spark.api.java.function.Function2;
9 | import org.apache.spark.api.java.function.VoidFunction;
10 | import scala.Tuple2;
11 |
12 | import java.util.ArrayList;
13 | import java.util.Arrays;
14 | import java.util.Iterator;
15 |
16 | /**
17 | * Created by zhaikaishun on 2017/8/20.
18 | */
19 | public class HashPartitionerRdd {
20 | public static void main(String[] args) {
21 |
22 | SparkConf sparkConf = new SparkConf().setAppName("HashPartitionerRdd").setMaster("local");
23 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
24 | sc.setLogLevel("WARN");
25 | JavaRDD> tupRdd = sc.parallelize(Arrays.asList(new Tuple2(1, 1), new Tuple2(1, 2)
26 | , new Tuple2(2, 3), new Tuple2(2, 4)
27 | , new Tuple2(3, 5), new Tuple2(3, 6)
28 | , new Tuple2(4, 7), new Tuple2(4, 8)
29 | , new Tuple2(5, 9), new Tuple2(5, 10)
30 | ), 3);
31 | JavaPairRDD pairRDD = JavaPairRDD.fromJavaRDD(tupRdd);
32 | JavaPairRDD partitioned = pairRDD.partitionBy(new HashPartitioner(3));
33 | System.out.println("============HashPartitioner==================");
34 | printPartRDD(partitioned);
35 |
36 |
37 | }
38 |
39 |
40 | /**
41 | * 用来打印 JavaPairRDD 类型的 rdd 的每个分区下的各个元素,这里顺便复习mapPartitionsWithIndex
42 | * 其实最好使用glom,就不用那么麻烦了,但是我这里没有使用
43 | * @param pairRDD
44 | */
45 | public static void printPartRDD(JavaPairRDD pairRDD) {
46 | JavaRDD>> mapPartitionIndexRDD = pairRDD.mapPartitionsWithIndex(new Function2>, Iterator>>>() {
47 | @Override
48 | public Iterator>> call(Integer partIndex, Iterator> tuple2Iterator) {
49 | ArrayList>> tuple2s = new ArrayList<>();
50 |
51 | while (tuple2Iterator.hasNext()) {
52 | Tuple2 next = tuple2Iterator.next();
53 | tuple2s.add(new Tuple2>(partIndex, next));
54 | }
55 | return tuple2s.iterator();
56 | }
57 | }, false);
58 |
59 | mapPartitionIndexRDD.foreach(new VoidFunction>>() {
60 | @Override
61 | public void call(Tuple2> integerTuple2Tuple2) throws Exception {
62 | System.out.println(integerTuple2Tuple2);
63 | }
64 | });
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/Tutorial13/JavaCustomPart.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.Tutorial13;
2 |
3 | import org.apache.spark.Partitioner;
4 |
5 | /**
6 | * 自定义分区器
7 | * Created by zhaikaishun on 2017/8/20.
8 | */
9 | public class JavaCustomPart extends Partitioner {
10 | int i = 1;
11 | public JavaCustomPart(int i){
12 | this.i=i;
13 | }
14 | public JavaCustomPart(){}
15 | @Override
16 | public int numPartitions() {
17 | return i;
18 | }
19 |
20 | @Override
21 | public int getPartition(Object key) {
22 | int keyCode = Integer.parseInt(key.toString());
23 | if(keyCode>=4){
24 | return 0;
25 | }else if(keyCode>=2&&keyCode<4){
26 | return 1;
27 | }else {
28 | return 2;
29 | }
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial1/ParallelizeRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial1;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.SparkContext;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.sql.SparkSession;
8 |
9 | import java.util.Arrays;
10 | import java.util.List;
11 |
12 | /**
13 | * Created by zhaikaishun on 2017/8/20.
14 | */
15 | public class ParallelizeRdd {
16 | public static void main(String[] args) {
17 | SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount").setMaster("local");
18 | JavaSparkContext jsc = new JavaSparkContext(sparkConf);
19 | jsc.setLogLevel("WARN");
20 | JavaRDD javaStringRDD = jsc.parallelize(
21 | Arrays.asList("shenzhen", "is a beautiful city"));
22 | List collect = javaStringRDD.collect();
23 | for (String str:collect) {
24 | System.out.println(str);
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial10/CollectAsMapRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial10;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import scala.Tuple2;
8 |
9 | import java.util.Arrays;
10 | import java.util.List;
11 | import java.util.Map;
12 |
13 | /**
14 | * Created by Administrator on 2018/4/18.
15 | */
16 | public class CollectAsMapRdd {
17 | public static void main(String[] args) {
18 | SparkConf sparkConf = new SparkConf().setAppName("CollectAsMapRdd").setMaster("local");
19 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
20 | sc.setLogLevel("WARN");
21 | JavaRDD> tupleRDD =
22 | sc.parallelize(Arrays.asList(new Tuple2<>(1, 2),
23 | new Tuple2<>(2, 4),
24 | new Tuple2<>(2, 5),
25 | new Tuple2<>(3, 4),
26 | new Tuple2<>(3, 5),
27 | new Tuple2<>(3, 6)));
28 | JavaPairRDD mapRDD = JavaPairRDD.fromJavaRDD(tupleRDD);
29 | // 发现bug, 会报 [Ljava.lang.Object; cannot be cast to [Lscala.Tuple2;
30 | Map collectMap = mapRDD.collectAsMap();
31 |
32 | for (Integer key: collectMap.keySet()){
33 | System.out.println("key: "+key+" value: "+collectMap.get(key));
34 | }
35 |
36 | }
37 |
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial10/CountByKeyRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial10;
2 |
3 | /**
4 | * Created by Administrator on 2018/4/18.
5 | */
6 | public class CountByKeyRdd {
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial10/tutorail10.md:
--------------------------------------------------------------------------------
1 | 参考: spark RDD算子(十)之PairRDD的Action操作countByKey, collectAsMap
2 | http://blog.csdn.net/t1dmzks/article/details/70833185
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial11/tutorial11.md:
--------------------------------------------------------------------------------
1 | 参考: spark RDD算子(十一)之RDD Action 保存操作saveAsTextFile,saveAsSequenceFile,saveAsObjectFile,saveAsHadoopFile 等
2 |
3 | http://blog.csdn.net/t1dmzks/article/details/71037850
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial12;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaRDD;
5 | import org.apache.spark.api.java.JavaSparkContext;
6 | import org.apache.spark.api.java.function.FlatMapFunction;
7 | import org.apache.spark.api.java.function.VoidFunction;
8 | import scala.Tuple2;
9 |
10 | import java.util.ArrayList;
11 | import java.util.Arrays;
12 | import java.util.Iterator;
13 |
14 | /**
15 | * Created by zhaikaishun on 2017/8/20.
16 | */
17 | public class MapPartitionsRdd {
18 | public static void main(String[] args) {
19 | SparkConf sparkConf = new SparkConf().setAppName("MapPartitionsRdd").setMaster("local");
20 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
21 | sc.setLogLevel("WARN");
22 |
23 | JavaRDD rdd = sc.parallelize(
24 | Arrays.asList(1,2,3,4,5,6,7,8,9,10));
25 | /*==========================把每一个元素平方 =======================================*/
26 | JavaRDD mapPartitionRDD = rdd.mapPartitions(new FlatMapFunction, Integer>() {
27 | @Override
28 | public Iterator call(Iterator it) throws Exception {
29 | ArrayList results = new ArrayList<>();
30 | while (it.hasNext()) {
31 | int i = it.next();
32 | results.add(i*i);
33 | }
34 | return results.iterator();
35 | }
36 | });
37 | System.out.println("把每一个元素平方");
38 | mapPartitionRDD.foreach(new VoidFunction() {
39 | @Override
40 | public void call(Integer integer) throws Exception {
41 | System.out.println(integer);
42 | }
43 | });
44 |
45 | /* ================把每一个数字i变成一个map(i,i*i)的形式======================================== */
46 |
47 | JavaRDD> tuple2JavaRDD = rdd.mapPartitions(new FlatMapFunction, Tuple2>() {
48 | @Override
49 | public Iterator> call(Iterator it) throws Exception {
50 | ArrayList> tuple2s = new ArrayList<>();
51 | while (it.hasNext()) {
52 | Integer next = it.next();
53 | tuple2s.add(new Tuple2(next, next * next));
54 | }
55 | return tuple2s.iterator();
56 | }
57 | });
58 | System.out.println("把每一个数字i变成一个map(i,i*i)的形式");
59 | tuple2JavaRDD.foreach(new VoidFunction>() {
60 | @Override
61 | public void call(Tuple2 tp2) throws Exception {
62 | System.out.println(tp2);
63 | }
64 | });
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial12;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.Function2;
8 | import org.apache.spark.api.java.function.VoidFunction;
9 | import scala.Tuple2;
10 |
11 | import java.util.ArrayList;
12 | import java.util.Arrays;
13 | import java.util.Iterator;
14 | import java.util.List;
15 |
16 | /**
17 | * Created by zhaikaishun on 2017/8/20.
18 | */
19 | public class MapPartitionsWithIndexRdd {
20 | public static void main(String[] args) {
21 | SparkConf sparkConf = new SparkConf().setAppName("MapPartitionsWithIndexRdd").setMaster("local");
22 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
23 | sc.setLogLevel("WARN");
24 | JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 3);
25 | JavaRDD> tuple2JavaRDD = rdd.mapPartitionsWithIndex(new Function2, Iterator>>() {
26 | @Override
27 | public Iterator> call(Integer partIndex, Iterator it) throws Exception {
28 | ArrayList> tuple2s = new ArrayList<>();
29 | while (it.hasNext()) {
30 | int next = it.next();
31 | tuple2s.add(new Tuple2<>(partIndex, next));
32 | }
33 | return tuple2s.iterator();
34 | }
35 | }, false);
36 |
37 | tuple2JavaRDD.foreach(new VoidFunction>() {
38 | @Override
39 | public void call(Tuple2 tp2) throws Exception {
40 | System.out.println(tp2);
41 | }
42 | });
43 | /*mapPartitionsWithIndex 统计键值对中的各个分区的元素 */
44 | JavaRDD> rdd1 = sc.parallelize(Arrays.asList(new Tuple2(1, 1), new Tuple2(1, 2)
45 | , new Tuple2(2, 3), new Tuple2(2, 4)
46 | , new Tuple2(3, 5), new Tuple2(3, 6)
47 | , new Tuple2(4, 7), new Tuple2(4, 8)
48 | , new Tuple2(5, 9), new Tuple2(5, 10)
49 | ), 3);
50 | JavaPairRDD pairRDD = JavaPairRDD.fromJavaRDD(rdd1);
51 |
52 | JavaRDD>> mapPartitionIndexRDD = pairRDD.mapPartitionsWithIndex(new Function2>, Iterator>>>() {
53 | @Override
54 | public Iterator>> call(Integer partIndex, Iterator> tuple2Iterator) {
55 | ArrayList>> tuple2s = new ArrayList<>();
56 |
57 | while (tuple2Iterator.hasNext()) {
58 | Tuple2 next = tuple2Iterator.next();
59 | tuple2s.add(new Tuple2>(partIndex, next));
60 | }
61 | return tuple2s.iterator();
62 | }
63 | }, false);
64 | System.out.println("mapPartitionsWithIndex 统计键值对中的各个分区的元素");
65 | mapPartitionIndexRDD.foreach(new VoidFunction>>() {
66 | @Override
67 | public void call(Tuple2> integerTuple2Tuple2) throws Exception {
68 | System.out.println(integerTuple2Tuple2);
69 | }
70 | });
71 |
72 | /*补充:打印各个分区的操作,可以使用 glom 的方法*/
73 | System.out.println("打印各个分区的操作,可以使用 glom 的方法");
74 | JavaRDD>> glom = pairRDD.glom();
75 | glom.foreach(new VoidFunction>>() {
76 | @Override
77 | public void call(List> tuple2s) throws Exception {
78 | System.out.println(tuple2s);
79 | }
80 | });
81 |
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial2/FilterRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial2;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaRDD;
5 | import org.apache.spark.api.java.JavaSparkContext;
6 | import org.apache.spark.api.java.function.Function;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * Created by zhaikaishun on 2017/8/20.
12 | */
13 | public class FilterRdd {
14 | public static void main(String[] args) {
15 | SparkConf sparkConf = new SparkConf().setAppName("FilterRdd").setMaster("local");
16 | JavaSparkContext jsc = new JavaSparkContext(sparkConf);
17 | jsc.setLogLevel("WARN");
18 | JavaRDD lines = jsc.textFile("D:\\git\\spark_tutorial\\src\\main\\resources\\filter_sample.txt");
19 | JavaRDD zksRDD = lines.filter(new Function() {
20 | @Override
21 | public Boolean call(String s) throws Exception {
22 | return s.contains("zks");
23 | }
24 | });
25 | //打印内容
26 | List zksCollect = zksRDD.collect();
27 | for (String str:zksCollect) {
28 | System.out.println(str);
29 | }
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial2/FlatMapRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial2;
2 |
3 | import com.spark.rdd.tutorial.util.MyIterator;
4 | import org.apache.spark.SparkConf;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.FlatMapFunction;
8 | import org.apache.spark.api.java.function.VoidFunction;
9 |
10 | import java.util.Arrays;
11 | import java.util.Iterator;
12 |
13 | /**
14 | * Created by zhaikaishun on 2017/8/20.
15 | * 2.0 版本以上的用iterator
16 | */
17 | public class FlatMapRdd {
18 | public static void main(String[] args) {
19 | SparkConf sparkConf = new SparkConf().setAppName("FlatMapRdd").setMaster("local");
20 | JavaSparkContext jsc = new JavaSparkContext(sparkConf);
21 | jsc.setLogLevel("WARN");
22 | JavaRDD lines = jsc.textFile("D:\\git\\spark_tutorial\\src\\main\\resources\\filter_sample.txt");
23 | JavaRDD flatMapRDD = lines.flatMap(new FlatMapFunction() {
24 | @Override
25 | public MyIterator call(String s) throws Exception {
26 | String[] split = s.split("\\s+");
27 | MyIterator myIterator = new MyIterator(Arrays.asList(split));
28 | return myIterator;
29 | }
30 | });
31 | //循环打印
32 | flatMapRDD.foreach(new VoidFunction() {
33 | @Override
34 | public void call(String s) throws Exception {
35 | System.out.println(s);
36 | }
37 | });
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial2/MapRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial2;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaRDD;
5 | import org.apache.spark.api.java.JavaSparkContext;
6 | import org.apache.spark.api.java.function.Function;
7 | import org.apache.spark.api.java.function.VoidFunction;
8 |
9 | import java.util.Arrays;
10 |
11 | /**
12 | * Created by zhaikaishun on 2017/8/20.
13 | * 貌似 map 在java 中没多大用处
14 | */
15 | public class MapRdd {
16 | public static void main(String[] args) {
17 | SparkConf sparkConf = new SparkConf().setAppName("MapRdd").setMaster("local");
18 | JavaSparkContext jsc = new JavaSparkContext(sparkConf);
19 | jsc.setLogLevel("WARN");
20 | JavaRDD lines = jsc.textFile("D:\\git\\spark_tutorial\\src\\main\\resources\\filter_sample.txt");
21 | JavaRDD> mapRDD = lines.map(new Function>() {
22 | @Override
23 | public Iterable call(String s) throws Exception {
24 | String[] split = s.split("\\s+");
25 | return Arrays.asList(split);
26 | }
27 | });
28 | //循环打印
29 | mapRDD.foreach(new VoidFunction>() {
30 | @Override
31 | public void call(Iterable strings) throws Exception {
32 | System.out.println(strings);
33 | }
34 | });
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial3/DisUnionAndSoOn.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial3;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import scala.Tuple2;
8 |
9 | import java.util.Arrays;
10 | import java.util.List;
11 |
12 | /**
13 | * Created by zhaikaishun on 2017/8/20.
14 | */
15 | public class DisUnionAndSoOn {
16 | public static void main(String[] args) {
17 | SparkConf sparkConf = new SparkConf().setAppName("tutorial3").setMaster("local");
18 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
19 | sc.setLogLevel("WARN");
20 |
21 | // Distinct
22 | JavaRDD RDD1 = sc.parallelize(Arrays.asList("aa", "aa", "bb", "cc", "dd"));
23 | JavaRDD distinctRDD = RDD1.distinct();
24 | List collect = distinctRDD.collect();
25 | System.out.println("*************** Distinct ***********");
26 | for (String str:collect) {
27 | System.out.println(str);
28 | }
29 |
30 | //union
31 | JavaRDD RDD2 = sc.parallelize(Arrays.asList("aa","dd","ff"));
32 | JavaRDD unionRDD = RDD1.union(RDD2);
33 | collect = unionRDD.collect();
34 | System.out.println("\n"+"*************** Union ***********");
35 | for (String str:collect) {
36 | System.out.println(str);
37 | }
38 |
39 | // intersection
40 | JavaRDD intersectionRDD = RDD1.intersection(RDD2);
41 | collect = intersectionRDD.collect();
42 | System.out.println("\n"+"*************** intersection ***********");
43 | for (String str:collect) {
44 | System.out.println(str);
45 | }
46 | //subtract
47 | JavaRDD subtractRDD = RDD1.subtract(RDD2);
48 | collect = subtractRDD.collect();
49 | System.out.println("\n"+"*************** Subtract ***********");
50 | for (String str:collect) {
51 | System.out.println(str);
52 | }
53 | //cartesian
54 | JavaPairRDD cartesian = RDD1.cartesian(RDD2);
55 | List> collect1 = cartesian.collect();
56 | System.out.println("\n"+"*************** Cartesian ***********");
57 | for (Tuple2 tp:collect1) {
58 | System.out.println("("+tp._1+" "+tp._2+")");
59 | }
60 |
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial4;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.PairFlatMapFunction;
8 | import org.apache.spark.api.java.function.VoidFunction;
9 | import scala.Tuple2;
10 |
11 | import java.util.ArrayList;
12 | import java.util.Iterator;
13 |
14 | /**
15 | * Created by zhaikaishun on 2017/8/20.
16 | */
17 | public class FlatMapToPairRdd {
18 | public static void main(String[] args) {
19 | SparkConf sparkConf = new SparkConf().setAppName("FlatMapToPairRdd").setMaster("local");
20 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
21 | sc.setLogLevel("WARN");
22 |
23 | JavaRDD lines = sc.textFile("D:\\git\\spark_tutorial\\src\\main\\resources\\filter_sample.txt");
24 |
25 | JavaPairRDD wordPairRDD = lines.flatMapToPair(new PairFlatMapFunction() {
26 | @Override
27 | public Iterator> call(String s) throws Exception {
28 | ArrayList> tpLists = new ArrayList>();
29 | String[] split = s.split("\\s+");
30 | for (int i = 0; i (split[i], 1);
32 | tpLists.add(tp);
33 | }
34 | return tpLists.iterator();
35 | }
36 | });
37 |
38 | wordPairRDD.foreach(new VoidFunction>() {
39 | @Override
40 | public void call(Tuple2 tp) throws Exception {
41 | System.out.println("key: "+tp._1+" value: "+tp._2);
42 | }
43 | });
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial4/MapToPairRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial4;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.PairFunction;
8 | import org.apache.spark.api.java.function.VoidFunction;
9 | import scala.Tuple2;
10 |
11 | /**
12 | * Created by zhaikaishun on 2017/8/20.
13 | */
14 | public class MapToPairRdd {
15 | public static void main(String[] args) {
16 | SparkConf sparkConf = new SparkConf().setAppName("MapToPairRdd").setMaster("local");
17 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
18 | sc.setLogLevel("WARN");
19 |
20 | JavaRDD lines = sc.textFile("D:\\git\\spark_tutorial\\src\\main\\resources\\filter_sample.txt");
21 | //输入的是一个string的字符串,输出的是一个(String, Integer) 的map
22 | JavaPairRDD pairRDD = lines.mapToPair(new PairFunction() {
23 | @Override
24 | public Tuple2 call(String s) throws Exception {
25 | return new Tuple2(s.split("\\s+")[0], 1);
26 | }
27 | });
28 |
29 | //输出
30 | pairRDD.foreach(new VoidFunction>() {
31 | @Override
32 | public void call(Tuple2 tp) throws Exception {
33 | System.out.println("key: "+tp._1+" value: "+tp._2);
34 | }
35 | });
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial5;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.Function;
8 | import org.apache.spark.api.java.function.Function2;
9 | import org.apache.spark.api.java.function.PairFunction;
10 | import scala.Tuple2;
11 |
12 | import java.util.ArrayList;
13 | import java.util.Map;
14 |
15 | /**
16 | * Created by zhaikaishun on 2017/8/20.
17 | */
18 | public class CombineByKeyRdd {
19 | public static void main(String[] args) {
20 | SparkConf sparkConf = new SparkConf().setAppName("CombineByKeyRdd").setMaster("local");
21 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
22 | sc.setLogLevel("WARN");
23 | ArrayList scoreDetails = new ArrayList<>();
24 | scoreDetails.add(new ScoreDetail("xiaoming", "Math", 98));
25 | scoreDetails.add(new ScoreDetail("xiaoming", "English", 88));
26 | scoreDetails.add(new ScoreDetail("wangwu", "Math", 75));
27 | scoreDetails.add(new ScoreDetail("wangwu", "Englist", 78));
28 | scoreDetails.add(new ScoreDetail("lihua", "Math", 90));
29 | scoreDetails.add(new ScoreDetail("lihua", "English", 80));
30 | scoreDetails.add(new ScoreDetail("zhangsan", "Math", 91));
31 | scoreDetails.add(new ScoreDetail("zhangsan", "English", 80));
32 |
33 | JavaRDD scoreDetailsRDD = sc.parallelize(scoreDetails);
34 |
35 | JavaPairRDD pairRDD = scoreDetailsRDD.mapToPair(new PairFunction() {
36 | @Override
37 | public Tuple2 call(ScoreDetail scoreDetail) throws Exception {
38 |
39 | return new Tuple2<>(scoreDetail.studentName, scoreDetail);
40 | }
41 | });
42 | // new Function();
43 |
44 | Function> createCombine = new Function>() {
45 | @Override
46 | public Tuple2 call(ScoreDetail scoreDetail) throws Exception {
47 | return new Tuple2<>(scoreDetail.score, 1);
48 | }
49 | };
50 |
51 | // Function2传入两个值,返回一个值
52 | Function2, ScoreDetail, Tuple2> mergeValue = new Function2, ScoreDetail, Tuple2>() {
53 | @Override
54 | public Tuple2 call(Tuple2 tp, ScoreDetail scoreDetail) throws Exception {
55 | return new Tuple2<>(tp._1 + scoreDetail.score, tp._2 + 1);
56 | }
57 | };
58 | Function2, Tuple2, Tuple2> mergeCombiners = new Function2, Tuple2, Tuple2>() {
59 | @Override
60 | public Tuple2 call(Tuple2 tp1, Tuple2 tp2) throws Exception {
61 | return new Tuple2<>(tp1._1 + tp2._1, tp1._2 + tp2._2);
62 | }
63 | };
64 | JavaPairRDD> combineByRDD = pairRDD.combineByKey(createCombine,mergeValue,mergeCombiners);
65 |
66 | //打印平均数
67 | Map> stringTuple2Map = combineByRDD.collectAsMap();
68 | for ( String et:stringTuple2Map.keySet()) {
69 | System.out.println(et+" "+stringTuple2Map.get(et)._1/stringTuple2Map.get(et)._2);
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial5/ScoreDetail.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial5;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * Created by zhaikaishun on 2017/8/20.
7 | */
8 | public class ScoreDetail implements Serializable {
9 | //case class ScoreDetail(studentName: String, subject: String, score: Float)
10 | public String studentName;
11 | public String subject;
12 | public float score;
13 |
14 | public ScoreDetail(String studentName, String subject, float score) {
15 | this.studentName = studentName;
16 | this.subject = subject;
17 | this.score = score;
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial6/FoldByKey.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial6;
2 |
3 | /**
4 | * Created by zhaikaishun on 2017/8/20.
5 | */
6 | public class FoldByKey {
7 | //类似于reduceBykey,不写了
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial6;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.Function2;
8 | import org.apache.spark.api.java.function.PairFlatMapFunction;
9 | import scala.Tuple2;
10 |
11 | import java.util.ArrayList;
12 | import java.util.Iterator;
13 | import java.util.Map;
14 |
15 | /**
16 | * Created by zhaikaishun on 2017/8/20.
17 | */
18 | public class ReduceByKeyRdd {
19 | public static void main(String[] args) {
20 | SparkConf sparkConf = new SparkConf().setAppName("ReduceByKey").setMaster("local");
21 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
22 | sc.setLogLevel("WARN");
23 | JavaRDD lines = sc.textFile("D:\\git\\spark_tutorial\\src\\main\\resources\\filter_sample.txt");
24 |
25 | JavaPairRDD wordPairRDD = lines.flatMapToPair(new PairFlatMapFunction() {
26 | @Override
27 | public Iterator> call(String s) throws Exception {
28 | ArrayList> tpLists = new ArrayList>();
29 | String[] split = s.split("\\s+");
30 | for (int i = 0; i (split[i], 1);
32 | tpLists.add(tp);
33 | }
34 | return tpLists.iterator();
35 | }
36 | });
37 |
38 | JavaPairRDD wordCountRDD = wordPairRDD.reduceByKey(new Function2() {
39 | @Override
40 | public Integer call(Integer i1, Integer i2) throws Exception {
41 | return i1 + i2;
42 | }
43 | });
44 | Map collectAsMap = wordCountRDD.collectAsMap();
45 | for (String key:collectAsMap.keySet()) {
46 | System.out.println("("+key+","+collectAsMap.get(key)+")");
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial6/SortByKey.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial6;
2 |
3 | /**
4 | * Created by zhaikaishun on 2017/8/20.
5 | */
6 | public class SortByKey {
7 | //soeasy 暂时不写了
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial7/CogroupRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial7;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import scala.Tuple2;
8 | import scala.Tuple3;
9 |
10 | import java.util.Arrays;
11 | import java.util.Map;
12 |
13 | /**
14 | * Created by zhaikaishun on 2017/8/20.
15 | */
16 | public class CogroupRdd {
17 | public static void main(String[] args) {
18 | SparkConf sparkConf = new SparkConf().setAppName("CogroupRdd").setMaster("local");
19 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
20 | sc.setLogLevel("WARN");
21 |
22 | JavaRDD> scoreDetails1 = sc.parallelize(Arrays.asList(new Tuple2("xiaoming", 75)
23 | , new Tuple2("xiaoming", 90)
24 | , new Tuple2("lihua", 95)
25 | , new Tuple2("lihua", 96)));
26 | JavaRDD> scoreDetails2 = sc.parallelize(Arrays.asList(new Tuple2("xiaoming", 75)
27 | , new Tuple2("lihua", 60)
28 | , new Tuple2("lihua", 62)));
29 | JavaRDD> scoreDetails3 = sc.parallelize(Arrays.asList(new Tuple2("xiaoming", 75)
30 | , new Tuple2("xiaoming", 45)
31 | , new Tuple2("lihua", 24)
32 | , new Tuple2("lihua", 57)));
33 |
34 | JavaPairRDD scoreMapRDD1 = JavaPairRDD.fromJavaRDD(scoreDetails1);
35 | JavaPairRDD scoreMapRDD2 = JavaPairRDD.fromJavaRDD(scoreDetails2);
36 | JavaPairRDD scoreMapRDD3 = JavaPairRDD.fromJavaRDD(scoreDetails2);
37 |
38 | JavaPairRDD, Iterable, Iterable>> cogroupRDD = (JavaPairRDD, Iterable, Iterable>>) scoreMapRDD1.cogroup(scoreMapRDD2, scoreMapRDD3);
39 | Map, Iterable, Iterable>> tuple3 = cogroupRDD.collectAsMap();
40 | for (String key:tuple3.keySet()) {
41 | System.out.println("("+key+", "+tuple3.get(key)+")");
42 | }
43 |
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial7;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.VoidFunction;
8 | import scala.Tuple2;
9 |
10 | import java.util.Arrays;
11 | import java.util.List;
12 | import java.util.Map;
13 |
14 | /**
15 | * Created by zhaikaishun on 2017/8/20.
16 | */
17 | public class GroupByKeyRdd {
18 | public static void main(String[] args) {
19 | SparkConf sparkConf = new SparkConf().setAppName("GroupByKeyRdd").setMaster("local");
20 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
21 | sc.setLogLevel("WARN");
22 |
23 | JavaRDD> scoreDetails = sc.parallelize(Arrays.asList(new Tuple2("xiaoming", 75)
24 | , new Tuple2("xiaoming", 90)
25 | , new Tuple2("zs1", 95)
26 | , new Tuple2("lihua", 188)
27 | , new Tuple2("xiaoming", 188)
28 | , new Tuple2("zs1", 188)
29 | , new Tuple2("lihua", 188)),3);
30 | //将JavaRDD> 类型转换为 JavaPairRDD
31 | JavaPairRDD scoreMapRDD = JavaPairRDD.fromJavaRDD(scoreDetails);
32 | JavaRDD>> glom = scoreMapRDD.glom();
33 | glom.foreach(new VoidFunction>>() {
34 | @Override
35 | public void call(List> tuple2s) throws Exception {
36 | System.out.println(tuple2s);
37 | System.out.println("######");
38 | }
39 | });
40 | System.out.println("++++++++++++++");
41 | scoreMapRDD.groupByKey().glom().foreach(new VoidFunction>>>() {
42 | @Override
43 | public void call(List>> tuple2s) throws Exception {
44 | System.out.println(tuple2s);
45 | System.out.println("*******");
46 | }
47 | });
48 |
49 | // Map> resultMap = scoreMapRDD.groupByKey().collectAsMap();
50 | // for (String key:resultMap.keySet()) {
51 | // System.out.println("("+key+", "+resultMap.get(key)+")");
52 | // }
53 |
54 |
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial8/JoinRDD.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial8;
2 | import org.apache.spark.SparkConf;
3 | import org.apache.spark.api.java.JavaPairRDD;
4 | import org.apache.spark.api.java.JavaRDD;
5 | import org.apache.spark.api.java.JavaSparkContext;
6 | import org.apache.spark.api.java.Optional;
7 | import org.apache.spark.api.java.function.VoidFunction;
8 | import scala.Tuple2;
9 | import java.util.Arrays;
10 | import java.util.Map;
11 |
12 | public class JoinRDD {
13 | public static void main(String[] args) {
14 | SparkConf sparkConf = new SparkConf().setAppName("ReduceByKey").setMaster("local");
15 | JavaSparkContext sc = new JavaSparkContext(sparkConf);
16 | sc.setLogLevel("WARN");
17 |
18 | JavaRDD> rddPre = sc.parallelize(Arrays.asList(new Tuple2(1,2)
19 | , new Tuple2(3,4)
20 | , new Tuple2(3,6)));
21 | JavaRDD> otherPre = sc.parallelize(Arrays.asList(new Tuple2(3,10),new Tuple2(4,8)));
22 |
23 | //JavaRDD转换成JavaPairRDD
24 | JavaPairRDD rdd = JavaPairRDD.fromJavaRDD(rddPre);
25 | JavaPairRDD other = JavaPairRDD.fromJavaRDD(otherPre);
26 | //subtractByKey
27 | JavaPairRDD subRDD = rdd.subtractByKey(other);
28 |
29 | //join
30 | JavaPairRDD> joinRDD = rdd.join(other);
31 | System.out.println("-------------joinRDD-------------");
32 | joinRDD.foreach(new VoidFunction>>() {
33 | @Override
34 | public void call(Tuple2> tptp) throws Exception {
35 | System.out.println("key: "+tptp._1+", value: "+tptp._2._1+","+tptp._2._2);
36 | }
37 | });
38 |
39 | System.out.println("-------------fullOutJoinRDD-------------");
40 | JavaPairRDD, Optional>> fullOutJoinRDD = rdd.fullOuterJoin(other);
41 | fullOutJoinRDD.foreach(new VoidFunction, Optional>>>() {
42 | @Override
43 | public void call(Tuple2, Optional>> tptp2) throws Exception {
44 | System.out.println("key: "+ tptp2._1+" value: "+tptp2._2._1+", "+tptp2._2._2);
45 | }
46 | });
47 |
48 | System.out.println("-------------leftOutJoinRDD-------------");
49 | JavaPairRDD>> leftOutJoinRDD = rdd.leftOuterJoin(other);
50 | leftOutJoinRDD.foreach(new VoidFunction>>>() {
51 | @Override
52 | public void call(Tuple2>> tptp2) throws Exception {
53 | System.out.println("key: "+tptp2._1+" value: "+tptp2._2._1+", "+tptp2._2._2);
54 | }
55 | });
56 |
57 | System.out.println("-------------rightOutJoinRDD-------------");
58 | JavaPairRDD, Integer>> rightOutJoinRDD = rdd.rightOuterJoin(other);
59 | rightOutJoinRDD.foreach(new VoidFunction, Integer>>>() {
60 | @Override
61 | public void call(Tuple2, Integer>> tptp2) throws Exception {
62 | System.out.println("key: "+tptp2._1+"value: "+tptp2._2._1+", "+tptp2._2._2);
63 | }
64 | });
65 |
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial8/readme.txt:
--------------------------------------------------------------------------------
1 | 比较简单,直接参考
2 | spark RDD算子(八)之键值对关联操作 subtractByKey, join, rightOuterJoin, leftOuterJoin
3 | http://blog.csdn.net/t1dmzks/article/details/70557249
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial9/SparkAction1.java:
--------------------------------------------------------------------------------
1 | package com.spark.rdd_tutorial.tutorial9;
2 |
3 | /**
4 | * Created by zhaikaishun on 2017/8/20.
5 | */
6 | public class SparkAction1 {
7 | //直接参考 http://blog.csdn.net/t1dmzks/article/details/70667011
8 |
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/com/spark/rdd_tutorial/tutorial9/readme.md:
--------------------------------------------------------------------------------
1 | ## spark RDD算子(九)之基本的Action操作 first, take, collect, count, countByValue, reduce, aggregate, fold,top
2 | 参考我的博客 http://blog.csdn.net/t1dmzks/article/details/70667011
--------------------------------------------------------------------------------
/src/main/resources/filter_sample.txt:
--------------------------------------------------------------------------------
1 | aa bb cc aa aa aa dd dd ee ee ee ee
2 | ff aa bb zks
3 | ee kks
4 | ee zz zks
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd/tutorial/util/MyIterator.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd/tutorial/util/MyIterator.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/Tutorial13/CustomPartitionRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/Tutorial13/CustomPartitionRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/Tutorial13/HashPartitionerRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/Tutorial13/JavaCustomPart.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/Tutorial13/JavaCustomPart.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial1/ParallelizeRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial1/ParallelizeRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial10/CollectAsMapRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial10/CollectAsMapRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial10/CountByKeyRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial10/CountByKeyRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$3.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$4.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd$4.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$3.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$4.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$4.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$5.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd$5.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial12/MapPartitionsWithIndexRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/FilterRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/FilterRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/FilterRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/FilterRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/FlatMapRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/FlatMapRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/FlatMapRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/FlatMapRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/FlatMapRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/FlatMapRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/MapRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/MapRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/MapRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/MapRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial2/MapRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial2/MapRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial3/DisUnionAndSoOn.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial3/DisUnionAndSoOn.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial4/FlatMapToPairRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial4/MapToPairRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial4/MapToPairRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial4/MapToPairRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial4/MapToPairRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial4/MapToPairRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial4/MapToPairRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$3.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$4.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd$4.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial5/CombineByKeyRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial5/ScoreDetail.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial5/ScoreDetail.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial6/FoldByKey.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial6/FoldByKey.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial6/ReduceByKeyRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial6/SortByKey.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial6/SortByKey.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial7/CogroupRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial7/CogroupRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial7/GroupByKeyRdd.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$1.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$2.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$3.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$4.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD$4.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial8/JoinRDD.class
--------------------------------------------------------------------------------
/target/classes/com/spark/rdd_tutorial/tutorial9/SparkAction1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaikaishun/spark_tutorial/b4cba69f85332e4ea106b4462b82cd19ea5960a9/target/classes/com/spark/rdd_tutorial/tutorial9/SparkAction1.class
--------------------------------------------------------------------------------
/target/classes/filter_sample.txt:
--------------------------------------------------------------------------------
1 | aa bb cc aa aa aa dd dd ee ee ee ee
2 | ff aa bb zks
3 | ee kks
4 | ee zz zks
--------------------------------------------------------------------------------