├── .gitignore ├── LICENSE ├── README.md ├── data ├── inputfile.txt └── people.txt ├── pom.xml ├── renovate.json └── src ├── main ├── java │ └── com │ │ └── javachen │ │ └── spark │ │ └── examples │ │ ├── mllib │ │ └── JavaALS.java │ │ ├── rdd │ │ └── JavaWordCount.java │ │ └── sparksql │ │ ├── JavaSparkSQLByReflection.java │ │ └── JavaSparkSQLBySchema.java ├── python │ ├── PythonALS.py │ ├── PythonSparkSQLByReflection.py │ ├── PythonSparkSQLBySchema.py │ └── PythonWordCount.py └── scala │ ├── com │ └── javachen │ │ └── spark │ │ └── examples │ │ ├── mllib │ │ ├── EvaluateResult.scala │ │ ├── MovieLensALS.scala │ │ ├── MovieSimilarities.scala │ │ ├── ScalaLocalALS.scala │ │ └── ScalaMovieLensALS.scala │ │ ├── rdd │ │ ├── ActionTest.scala │ │ ├── Aggregate.scala │ │ ├── AggregateOrder.scala │ │ ├── Cartesian.scala │ │ ├── CollectAsMap.scala │ │ ├── FlatMap.scala │ │ ├── GroupByAction.scala │ │ ├── GroupByKey.scala │ │ ├── GroupWith.scala │ │ ├── Join.scala │ │ ├── Lookup.scala │ │ ├── MapPartitions.scala │ │ ├── MapValues.scala │ │ ├── PartitionBy.scala │ │ ├── Pipe.scala │ │ ├── ReduceByKey.scala │ │ ├── ScalaWordCount.scala │ │ └── TransformTest.scala │ │ └── sparksql │ │ ├── ScalaSparkSQLByReflection.scala │ │ └── ScalaSparkSQLBySchema.scala │ └── org │ └── apache │ └── spark │ └── examples │ ├── BroadcastTest.scala │ ├── DriverSubmissionTest.scala │ ├── ExceptionHandlingTest.scala │ ├── GroupByTest.scala │ ├── HdfsTest.scala │ ├── LocalALS.scala │ ├── LocalFileLR.scala │ ├── LocalKMeans.scala │ ├── LocalLR.scala │ ├── LocalPi.scala │ ├── LogQuery.scala │ ├── MultiBroadcastTest.scala │ ├── SimpleSkewedGroupByTest.scala │ ├── SkewedGroupByTest.scala │ ├── SparkALS.scala │ ├── SparkHdfsLR.scala │ ├── SparkKMeans.scala │ ├── SparkLR.scala │ ├── SparkPageRank.scala │ ├── SparkPi.scala │ ├── SparkTC.scala │ ├── SparkTachyonHdfsLR.scala │ └── SparkTachyonPi.scala └── test └── java └── com └── javachen └── spark └── AppTest.java /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/README.md -------------------------------------------------------------------------------- /data/inputfile.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/data/inputfile.txt -------------------------------------------------------------------------------- /data/people.txt: -------------------------------------------------------------------------------- 1 | Michael, 29 2 | Andy, 30 3 | Justin, 19 -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/pom.xml -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/renovate.json -------------------------------------------------------------------------------- /src/main/java/com/javachen/spark/examples/mllib/JavaALS.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/java/com/javachen/spark/examples/mllib/JavaALS.java -------------------------------------------------------------------------------- /src/main/java/com/javachen/spark/examples/rdd/JavaWordCount.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/java/com/javachen/spark/examples/rdd/JavaWordCount.java -------------------------------------------------------------------------------- /src/main/java/com/javachen/spark/examples/sparksql/JavaSparkSQLByReflection.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/java/com/javachen/spark/examples/sparksql/JavaSparkSQLByReflection.java -------------------------------------------------------------------------------- /src/main/java/com/javachen/spark/examples/sparksql/JavaSparkSQLBySchema.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/java/com/javachen/spark/examples/sparksql/JavaSparkSQLBySchema.java -------------------------------------------------------------------------------- /src/main/python/PythonALS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/python/PythonALS.py -------------------------------------------------------------------------------- /src/main/python/PythonSparkSQLByReflection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/python/PythonSparkSQLByReflection.py -------------------------------------------------------------------------------- /src/main/python/PythonSparkSQLBySchema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/python/PythonSparkSQLBySchema.py -------------------------------------------------------------------------------- /src/main/python/PythonWordCount.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/python/PythonWordCount.py -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/mllib/EvaluateResult.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/mllib/EvaluateResult.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/mllib/MovieLensALS.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/mllib/MovieLensALS.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/mllib/MovieSimilarities.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/mllib/MovieSimilarities.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/mllib/ScalaLocalALS.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/mllib/ScalaLocalALS.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/mllib/ScalaMovieLensALS.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/mllib/ScalaMovieLensALS.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/ActionTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/ActionTest.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/Aggregate.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/Aggregate.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/AggregateOrder.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/AggregateOrder.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/Cartesian.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/Cartesian.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/CollectAsMap.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/CollectAsMap.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/FlatMap.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/FlatMap.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/GroupByAction.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/GroupByAction.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/GroupByKey.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/GroupByKey.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/GroupWith.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/GroupWith.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/Join.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/Join.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/Lookup.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/Lookup.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/MapPartitions.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/MapPartitions.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/MapValues.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/MapValues.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/PartitionBy.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/PartitionBy.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/Pipe.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/Pipe.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/ReduceByKey.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/ReduceByKey.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/ScalaWordCount.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/ScalaWordCount.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/rdd/TransformTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/rdd/TransformTest.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/sparksql/ScalaSparkSQLByReflection.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/sparksql/ScalaSparkSQLByReflection.scala -------------------------------------------------------------------------------- /src/main/scala/com/javachen/spark/examples/sparksql/ScalaSparkSQLBySchema.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/com/javachen/spark/examples/sparksql/ScalaSparkSQLBySchema.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/BroadcastTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/BroadcastTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/GroupByTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/GroupByTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/HdfsTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/HdfsTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/LocalALS.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/LocalALS.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/LocalFileLR.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/LocalFileLR.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/LocalKMeans.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/LocalKMeans.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/LocalLR.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/LocalLR.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/LocalPi.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/LocalPi.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/LogQuery.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/LogQuery.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkALS.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkALS.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkKMeans.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkKMeans.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkLR.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkLR.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkPageRank.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkPageRank.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkPi.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkPi.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkTC.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkTC.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/examples/SparkTachyonPi.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/main/scala/org/apache/spark/examples/SparkTachyonPi.scala -------------------------------------------------------------------------------- /src/test/java/com/javachen/spark/AppTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chensoul/learning-spark/HEAD/src/test/java/com/javachen/spark/AppTest.java --------------------------------------------------------------------------------