teenagers2 = spark.sql("select name from parquetPeople where age > 13 and age <= 19");
80 |
81 | teenagers2.show();
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/spark-sql/src/main/java/com/spark/entity/People.java:
--------------------------------------------------------------------------------
1 | package com.spark.entity;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * Created with IDEA
7 | * User: vector
8 | * Data: 2018/4/20 0020
9 | * Time: 11:14
10 | * Description:
11 | */
12 | public class People implements Serializable {
13 | private String name;
14 | private int age;
15 |
16 | public String getName() {
17 | return name;
18 | }
19 |
20 | public void setName(String name) {
21 | this.name = name;
22 | }
23 |
24 | public int getAge() {
25 | return age;
26 | }
27 |
28 | public void setAge(int age) {
29 | this.age = age;
30 | }
31 |
32 | @Override
33 | public String toString() {
34 | return "People{" +
35 | "name='" + name + '\'' +
36 | ", age=" + age +
37 | '}';
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/spark-sql/src/main/resources/people.json:
--------------------------------------------------------------------------------
1 | {"name":"Michael"}
2 | {"name":"Andy", "age":30}
3 | {"name":"Justin", "age":19}
--------------------------------------------------------------------------------
/word-count/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | word-count
6 | com.spark.sample
7 | 1.0-SNAPSHOT
8 | 4.0.0
9 |
10 | 1.8
11 | 2.4.3
12 |
13 |
14 |
15 |
16 | org.apache.spark
17 | spark-core_2.11
18 | ${spark.version}
19 |
20 |
21 | org.apache.spark
22 | spark-streaming_2.11
23 | ${spark.version}
24 |
25 |
26 |
27 | commons-io
28 | commons-io
29 | 2.4
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | maven-compiler-plugin
38 |
39 | ${java.version}
40 | ${java.version}
41 | UTF-8
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/word-count/src/main/java/com/spark/WordCount.java:
--------------------------------------------------------------------------------
1 | package com.spark;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.FlatMapFunction;
8 | import org.apache.spark.api.java.function.Function2;
9 | import org.apache.spark.api.java.function.PairFunction;
10 | import org.apache.spark.api.java.function.VoidFunction;
11 | import scala.Tuple2;
12 |
13 | import java.util.Arrays;
14 | import java.util.List;
15 |
16 | /**
17 | * @Author: wangxc
18 | * @GitHub: https://github.com/vector4wang
19 | * @CSDN: http://blog.csdn.net/qqhjqs?viewmode=contents
20 | * @BLOG: http://vector4wang.tk
21 | * @wxid: BMHJQS
22 | *
23 | * 《巴黎圣母院》英文版的统计 用于本机学习与测试
24 | */
25 | public class WordCount {
26 | public static void main(String[] args) {
27 |
28 | SparkConf conf = new SparkConf()
29 | .setMaster("local")
30 | .setAppName("WordCount")
31 | .set("spark.cores.max", "1")
32 | .set("spark.eventLog.enabled", "true");
33 | Tuple2[] all = conf.getAll();
34 | for (Tuple2 stringStringTuple2 : all) {
35 | System.out.println(stringStringTuple2._1 + ": " + stringStringTuple2._2);
36 | }
37 | JavaSparkContext context = new JavaSparkContext(conf);
38 | // 用于idea测试
39 | String classFilePath = WordCount.class.getResource("/blsmy.txt").getPath();
40 |
41 | JavaRDD javaRDD = context.textFile(classFilePath);
42 | // JavaRDD javaRDD = context.textFile("file:///mnt/data/blsmy.txt"); -- 用于集群运行(前提,运行的各节点都需要有此文件)
43 | // JavaRDD javaRDD = context.textFile("hdfs://spark-master:9000/wordcount/blsmy.txt");
44 |
45 | //
46 | JavaRDD words = javaRDD.flatMap((FlatMapFunction) s -> {
47 | String[] split = s.split(" ");
48 | List strings = Arrays.asList(split);
49 | return strings.iterator();
50 | });
51 |
52 | JavaPairRDD pairs = words.mapToPair((PairFunction) s -> new Tuple2<>(s, 1));
53 |
54 | JavaPairRDD reduceByKey = pairs.reduceByKey((Function2) (integer, integer2) -> integer + integer2);
55 |
56 | JavaPairRDD integerStringJavaPairRDD = reduceByKey.mapToPair((PairFunction, Integer, String>) stringIntegerTuple2 -> new Tuple2<>(stringIntegerTuple2._2, stringIntegerTuple2._1));
57 |
58 |
59 | JavaPairRDD mapToPair = integerStringJavaPairRDD.sortByKey(false).mapToPair((PairFunction, String, Integer>) tuple -> new Tuple2<>(tuple._2, tuple._1));
60 |
61 | mapToPair.foreach((VoidFunction>) tuple -> System.out.println(tuple._1 + ": " + tuple._2));
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/word-count/src/main/java/com/spark/streaming/SparkStreamingDemo.java:
--------------------------------------------------------------------------------
1 | package com.spark.streaming;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.streaming.Durations;
5 | import org.apache.spark.streaming.api.java.JavaDStream;
6 | import org.apache.spark.streaming.api.java.JavaPairDStream;
7 | import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
8 | import org.apache.spark.streaming.api.java.JavaStreamingContext;
9 | import scala.Tuple2;
10 |
11 | import java.util.Arrays;
12 |
13 | /**
14 | * @author vector
15 | * @date: 2019/7/4 0004 17:05
16 | */
17 | public class SparkStreamingDemo {
18 | public static void main(String[] args) throws InterruptedException {
19 | SparkConf conf = new SparkConf()
20 | .setMaster("local[2]")
21 | .setAppName("NetWorkWordCount");
22 |
23 | JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(5));
24 |
25 | // 在目标机器人 执行 `nc -lp 9999` 打开9999端口,然后可以输入一些字符串
26 | JavaReceiverInputDStream lines = jsc.socketTextStream("192.168.1.33", 9999);
27 | JavaDStream words = lines.flatMap(line -> Arrays.asList(line.split(" ")).iterator());
28 | JavaPairDStream pairDStream = words.mapToPair(word -> new Tuple2<>(word, 1));
29 | JavaPairDStream wordCounts = pairDStream.reduceByKey((i1, i2) -> i1 + i2);
30 | wordCounts.print();
31 | jsc.start();
32 | jsc.awaitTermination();
33 |
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/word-count/src/main/resources/blsmy.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vector4wang/quick-spark-process/d2ddd8cfb84cf4f6f0d5d2b23b503276ec8e579a/word-count/src/main/resources/blsmy.txt
--------------------------------------------------------------------------------