├── _config.yml ├── imgs ├── mammut.png ├── spark_core │ ├── memory.png │ ├── rdd-itr.png │ ├── rdd-loop.png │ ├── shuffle.png │ ├── wc-trans.png │ ├── rdd-feature.png │ ├── spark-eco.png │ ├── wordcount.png │ ├── dependencies.png │ ├── rdd-inmemory.png │ ├── object-lifetime.png │ ├── repartition-less2more.png │ ├── repartition-more2less.png │ └── context_cleaner │ │ ├── jobs_tab_cached_rdd.png │ │ └── storage_tab_cached_rdd.jpg └── spark_basics │ ├── spark-stack.png │ └── sparkcontext-services.png ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .travis.yml ├── .gitignore ├── example ├── src │ └── main │ │ ├── scala │ │ └── com │ │ │ └── netease │ │ │ └── bigdata │ │ │ └── spark │ │ │ ├── WordCount.scala │ │ │ └── rdd │ │ │ └── RDDCacheTest.scala │ │ └── java │ │ └── com │ │ └── netease │ │ └── bigdata │ │ └── hadoop │ │ └── WordCount.java └── pom.xml ├── README.md ├── scalastyle-config.xml ├── pom.xml └── slides ├── spark_core ├── context_cleaner.html └── rdd_basics.html └── spark_basics └── spark_basics_and_quick_start.html /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-leap-day 2 | -------------------------------------------------------------------------------- /imgs/mammut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/mammut.png -------------------------------------------------------------------------------- /imgs/spark_core/memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/memory.png -------------------------------------------------------------------------------- /imgs/spark_core/rdd-itr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/rdd-itr.png -------------------------------------------------------------------------------- /imgs/spark_core/rdd-loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/rdd-loop.png -------------------------------------------------------------------------------- /imgs/spark_core/shuffle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/shuffle.png -------------------------------------------------------------------------------- /imgs/spark_core/wc-trans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/wc-trans.png -------------------------------------------------------------------------------- /imgs/spark_core/rdd-feature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/rdd-feature.png -------------------------------------------------------------------------------- /imgs/spark_core/spark-eco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/spark-eco.png -------------------------------------------------------------------------------- /imgs/spark_core/wordcount.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/wordcount.png -------------------------------------------------------------------------------- /imgs/spark_basics/spark-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_basics/spark-stack.png -------------------------------------------------------------------------------- /imgs/spark_core/dependencies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/dependencies.png -------------------------------------------------------------------------------- /imgs/spark_core/rdd-inmemory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/rdd-inmemory.png -------------------------------------------------------------------------------- /imgs/spark_core/object-lifetime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/object-lifetime.png -------------------------------------------------------------------------------- /imgs/spark_core/repartition-less2more.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/repartition-less2more.png -------------------------------------------------------------------------------- /imgs/spark_core/repartition-more2less.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/repartition-more2less.png -------------------------------------------------------------------------------- /imgs/spark_basics/sparkcontext-services.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_basics/sparkcontext-services.png -------------------------------------------------------------------------------- /imgs/spark_core/context_cleaner/jobs_tab_cached_rdd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/context_cleaner/jobs_tab_cached_rdd.png -------------------------------------------------------------------------------- /imgs/spark_core/context_cleaner/storage_tab_cached_rdd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netease-bigdata/ne-spark-courseware/HEAD/imgs/spark_core/context_cleaner/storage_tab_cached_rdd.jpg -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **Expected behavior** 11 | A clear and concise description of what you expected to happen. 12 | 13 | **Screenshots** 14 | If applicable, add screenshots to help explain your problem. 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | ## 题名 8 | 【在这里描述你的主题】 9 | ## 摘要 10 | 【概要性描述你的主题内容】 11 | ## 大纲 12 | - 自我介绍 13 | - 目录 14 | - 主题一 15 | - 主题一-1 16 | - 主题一-2 17 | - 主题一-3 18 | - 主题二 19 | - 主题二-1 20 | - 主题二-2 21 | - 主题三 22 | - 主题三-1 23 | - 结尾 24 | 25 | ## 附录 26 | 【在这里描述你的主题涵盖的其他信息】 27 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.8 4 | 5 | cache: 6 | directories: 7 | - $HOME/.m2 8 | 9 | deploy: 10 | provider: pages 11 | skip_cleanup: true 12 | github_token: $GITHUB_TOKEN 13 | email: yaooqinn@hotmail.com 14 | name: Kent Yao 15 | on: 16 | branch: master 17 | 18 | script: 19 | - mvn package -q -Dmaven.javadoc.skip=true -B -V 20 | 21 | notifications: 22 | email: false 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *#*# 2 | *.#* 3 | *.iml 4 | *.ipr 5 | *.iws 6 | *.pyc 7 | *.pyo 8 | *.swp 9 | *~ 10 | .DS_Store 11 | .cache 12 | .classpath 13 | .ensime 14 | .ensime_cache/ 15 | .ensime_lucene 16 | .generated-mima* 17 | .idea/ 18 | .idea_modules/ 19 | .project 20 | .pydevproject 21 | .scala_dependencies 22 | .settings 23 | target/ 24 | dist/ 25 | kyuubi-*-bin-* 26 | *.gz 27 | logs/ 28 | pid/ 29 | local/ 30 | out/ 31 | hs_err_pid* 32 | spark-warehouse/ 33 | metastore_db 34 | derby.log 35 | 36 | -------------------------------------------------------------------------------- /example/src/main/scala/com/netease/bigdata/spark/WordCount.scala: -------------------------------------------------------------------------------- 1 | package com.netease.bigdata.spark 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | 5 | object WordCount { 6 | 7 | def main(args: Array[String]): Unit = { 8 | require(args.length == 1, "Usage: WordCount ") 9 | val conf = new SparkConf().setAppName("Word Count").setMaster("local[*]") 10 | val sparkContext = new SparkContext(conf) 11 | val textFile = sparkContext.textFile(args(0), 2) 12 | val words = textFile.flatMap(_.split(" ")) 13 | val ones = words.map((_, 1)) 14 | val counts = ones.reduceByKey(_ + _) 15 | val res = counts.collect() 16 | for ((word, count) <- res) { 17 | println(word + ": " + count) 18 | } 19 | 20 | sparkContext.stop() 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /example/src/main/scala/com/netease/bigdata/spark/rdd/RDDCacheTest.scala: -------------------------------------------------------------------------------- 1 | package com.netease.bigdata.spark.rdd 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | 5 | import scala.util.Random 6 | 7 | object RDDCacheTest { 8 | 9 | def main(args: Array[String]): Unit = { 10 | val conf = new SparkConf() 11 | .setAppName(getClass.getSimpleName) 12 | .set("spark.cleaner.periodicGC.interval", "1min") // context cleaner 13 | val sc = new SparkContext(conf) 14 | val data = Seq.fill(1024 * 1024 * 100)(Random.nextInt(100)) 15 | val rdd1 = sc.parallelize(data, 20) 16 | rdd1.cache() // mark rdd 1 cache 17 | val rdd2 = rdd1.map((_, 1)).reduceByKey(_ + _) // word count 18 | val cachedRdd2 = rdd2.cache() // cache shuffled rdd 19 | rdd2.collect() // action actually trigger caching 20 | rdd1.count() // ditto 21 | rdd2.count() // rdd reuse 22 | cachedRdd2.count() // ditto 23 | rdd1.map((_, 1)).reduceByKey(_ + _).take(1) // rdd 1 reuse, not rdd 2 24 | // no rdd reuse 25 | val rdd3 = sc.parallelize(data, 30) 26 | rdd3.map((_, 1)).reduceByKey(_ + _).count() 27 | 10.to(20, 2).foreach { i => 28 | val tmp = rdd3.groupBy(_ % i) 29 | tmp.cache().count() 30 | if (i % 3 == 0) tmp.take(1) 31 | } 32 | Thread.sleep(1000 * 60 * 10) 33 | sc.stop() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /example/src/main/java/com/netease/bigdata/hadoop/WordCount.java: -------------------------------------------------------------------------------- 1 | package com.netease.bigdata.hadoop; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.mapreduce.Job; 9 | import org.apache.hadoop.mapreduce.Mapper; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 14 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 15 | 16 | import java.io.IOException; 17 | import java.util.StringTokenizer; 18 | 19 | public class WordCount { 20 | 21 | public static class Map extends Mapper { 22 | private final static IntWritable one = new IntWritable(1); 23 | private Text word = new Text(); 24 | 25 | public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 26 | String line = value.toString(); 27 | StringTokenizer tokenizer = new StringTokenizer(line); 28 | while (tokenizer.hasMoreTokens()) { 29 | word.set(tokenizer.nextToken()); 30 | context.write(word, one); 31 | } 32 | } 33 | } 34 | 35 | public static class Reduce extends Reducer { 36 | public void reduce(Text key, Iterable values, Context context) 37 | throws IOException, InterruptedException { 38 | int sum = 0; 39 | for (IntWritable val : values) { 40 | sum += val.get(); 41 | } 42 | context.write(key, new IntWritable(sum)); 43 | } 44 | } 45 | 46 | public static void main(String[] args) throws Exception { 47 | Configuration conf = new Configuration(); 48 | Job job = new Job(conf, "wordcount"); 49 | job.setOutputKeyClass(Text.class); 50 | job.setOutputValueClass(IntWritable.class); 51 | job.setMapperClass(Map.class); 52 | job.setReducerClass(Reduce.class); 53 | job.setInputFormatClass(TextInputFormat.class); 54 | job.setOutputFormatClass(TextOutputFormat.class); 55 | FileInputFormat.addInputPath(job, new Path(args[0])); 56 | FileOutputFormat.setOutputPath(job, new Path(args[1])); 57 | job.waitForCompletion(true); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NetEase Spark Courses [![HitCount](http://hits.dwyl.io/netease-bigdata/ne-spark-courseware.svg)](http://hits.dwyl.io/netease-bigdata/ne-spark-courseware) 2 | 3 | 本项目旨在指导相关的用户在使用[网易猛犸大数据平台](https://bigdata.163yun.com/mammut)的过程中能够更加方便使用Apache Spark进行日常的数据开发工作。 4 | 5 | 6 | ## 一、基础知识 7 | #### 1. [Spark概述及快速入门指南](https://netease-bigdata.github.io/ne-spark-courseware/slides/spark_basics/spark_basics_and_quick_start.html#1) 8 | #### 2. [基于Maven在IDE中开发Spark应用]() 9 | 10 | ## 二、 Spark Core 11 | #### 1. [Spark RDD概述](https://netease-bigdata.github.io/ne-spark-courseware/slides/spark_core/rdd_basics.html#1) 12 | #### 2. [Spark垃圾回收机制 -- ContextCleaner](https://netease-bigdata.github.io/ne-spark-courseware/slides/spark_core/context_cleaner.html#1) 13 | #### [Spark On YARN]() 14 | 15 | ## 三、 Spark SQL 16 | #### [DataFrame/Dataset]() 17 | #### [Spark SQL与Hive集成]() 18 | #### [Spark SQL UDF]() 19 | #### [如何优化Spark SQL执行过程]() 20 | #### [Spark SQL Catalyst工作原理详解]() 21 | #### [Spark SQL Cost Based Optimization详解]() 22 | #### [Spark SQL Thrift Server详解]() 23 | #### [Spark SQL 操作各种数据源]() 24 | #### [Spark SQL 参数详解及调优]() 25 | 26 | ## 四、 Spark Streaming 27 | #### [大数据处理的类型、流计算的框架及内容概要]() 28 | #### [SparkStreaming是什么及数据处理流程]() 29 | #### [Spark Streaming集成Kafka]() 30 | #### [Spark Streaming集成Flume]() 31 | 32 | 33 | ## 五、 Spark Structured Streaming 34 | #### [Spark Structured Streaming Basics](https://yaooqinn.github.io/sugar/slides/StructuedStreamingBasics.html#1) 35 | 36 | ## 六、 Spark Machine Learning 37 | 38 | ## 七、 Spark GraphX 39 | 40 | ## 八、 R on Spark 41 | 42 | ## 九、 Mammut Spark 数据开发 43 | #### [如何使用猛犸Spark进行数据开发]() 44 | #### [如何使用猛犸进行ETL开发]() 45 | #### [如何使用猛犸Spark Streaming任务开发及调优]() 46 | 47 | ## 十、 Mammut Spark 自助分析 48 | 49 | ## 十一、 Spark 参数详解 50 | 51 | ## 十二、 其他 52 | - DataSourceV2 53 | - [DataSourceV2 Overview](https://yaooqinn.github.io/sugar/docs/spark/datasourcev2/1_start_from_the_jira.html) - 范文臣大神[SPIP: DataSource API V2](https://docs.google.com/document/d/1n_vUVbF4KD3gxTmkNEon5qdQ-Z8qU5Frf6WMQZ6jJVM/edit#heading=h.mi1fbff5f8f9)读后感 54 | 55 | --- 56 | 57 | ## 推广链接 58 | [Kyuubi](https://github.com/yaooqinn/kyuubi) 基于Spark实现的多租户SQL Thrift/JDBC/ODBC服务 [![codecov](https://codecov.io/gh/yaooqinn/kyuubi/branch/master/graph/badge.svg)](https://codecov.io/gh/yaooqinn/kyuubi) [![Build Status](https://travis-ci.org/yaooqinn/kyuubi.svg?branch=master)](https://travis-ci.org/yaooqinn/kyuubi)[![HitCount](http://hits.dwyl.io/yaooqinn/kyuubi.svg)](http://hits.dwyl.io/yaooqinn/kyuubi) 59 | 60 | [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) 提供Spark SQL权限控制能力的插件 [![Build Status](https://travis-ci.org/yaooqinn/spark-authorizer.svg?branch=master)](https://travis-ci.org/yaooqinn/spark-authorizer) 61 | -------------------------------------------------------------------------------- /example/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | spark-courseware 7 | com.netease.bigdata 8 | 1.0.0-SNAPSHOT 9 | ../pom.xml 10 | 11 | 4.0.0 12 | 13 | example 14 | Examples 15 | jar 16 | 17 | 18 | 19 | org.scala-lang 20 | scala-library 21 | 22 | 23 | org.apache.hadoop 24 | hadoop-mapreduce-client-core 25 | 26 | 27 | org.apache.hadoop 28 | hadoop-client 29 | 30 | 31 | 32 | org.apache.spark 33 | spark-core_${scala.binary.version} 34 | 35 | 36 | 37 | 38 | 39 | 40 | org.apache.maven.plugins 41 | maven-compiler-plugin 42 | 3.5.1 43 | 44 | ${java.version} 45 | ${java.version} 46 | UTF-8 47 | 1024m 48 | true 49 | 50 | -Xlint:all,-serial,-path 51 | 52 | 53 | 54 | 55 | 56 | net.alchim31.maven 57 | scala-maven-plugin 58 | 3.3.1 59 | 60 | 61 | eclipse-add-source 62 | 63 | add-source 64 | 65 | 66 | 67 | scala-compile-first 68 | 69 | compile 70 | 71 | 72 | 73 | scala-test-compile-first 74 | 75 | testCompile 76 | 77 | 78 | 79 | 80 | ${scala.version} 81 | incremental 82 | true 83 | 84 | -unchecked 85 | -deprecation 86 | -feature 87 | -explaintypes 88 | -Yno-adapted-args 89 | 90 | 91 | -Xms1024m 92 | -Xmx1024m 93 | -XX:ReservedCodeCacheSize=512M 94 | 95 | 96 | -source 97 | ${java.version} 98 | -target 99 | ${java.version} 100 | -Xlint:all,-serial,-path,-try 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 17 | 39 | 40 | 41 | Scalastyle standard configuration 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | true 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW 105 | 106 | 107 | 108 | 109 | 110 | ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | Runtime\.getRuntime\.addShutdownHook 119 | 127 | 128 | 129 | 130 | mutable\.SynchronizedBuffer 131 | 139 | 140 | 141 | 142 | Await\.ready 143 | 150 | 151 | 152 | 153 | JavaConversions 154 | Instead of importing implicits in scala.collection.JavaConversions._, import 155 | scala.collection.JavaConverters._ and use .asScala / .asJava methods 156 | 157 | 158 | 159 | org\.apache\.commons\.lang\. 160 | Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead 161 | of Commons Lang 2 (package org.apache.commons.lang.*) 162 | 163 | 164 | 165 | extractOpt 166 | Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter 167 | is slower. 168 | 169 | 170 | 171 | 172 | java,scala,3rdParty,yaooqinn 173 | javax?\..* 174 | scala\..* 175 | (?!yaooqinn).* 176 | 177 | 178 | 179 | 180 | 181 | COMMA 182 | 183 | 184 | 185 | 186 | \)\{ 187 | 190 | 191 | 192 | 193 | (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] 194 | Use Javadoc style indentation for multiline comments 195 | 196 | 197 | 198 | case[^\n>]*=>\s*\{ 199 | Omit braces in case clauses. 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 800> 250 | 251 | 252 | 253 | 254 | 30 255 | 256 | 257 | 258 | 259 | 10 260 | 261 | 262 | 263 | 264 | 50 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | -1,0,1,2,3 276 | 277 | 278 | 279 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 21 | 4.0.0 22 | 23 | com.netease.bigdata 24 | spark-courseware 25 | Spark Courseware 26 | 1.0.0-SNAPSHOT 27 | 28 | example 29 | 30 | pom 31 | 32 | 33 | 34 | The Apache Software License, Version 2.0 35 | http://www.apache.org/licenses/LICENSE-2.0.txt 36 | manual 37 | 38 | 39 | 40 | 41 | 42 | yaooqinn 43 | Kent Yao 44 | yaooqinn@hotmail.com 45 | NetEase 46 | https://github.com/yaooqinn 47 | 48 | 49 | 50 | 51 | UTF-8 52 | 1.7 53 | 2.11.8 54 | 2.2.6 55 | 2.11 56 | 3.3.9 57 | org.apache.spark 58 | 2.1.2 59 | provided 60 | 2.6.5 61 | provided 62 | org.spark-project.hive 63 | 1.2.1.spark2 64 | 1.2.1 65 | provided 66 | 1.1 67 | 2.0.0-M15 68 | 69 | 70 | 71 | 72 | central 73 | 74 | Maven Repository 75 | https://repo.maven.apache.org/maven2 76 | 77 | true 78 | 79 | 80 | false 81 | 82 | 83 | 84 | apache 85 | Apache Repository Snapshots 86 | http://repository.apache.org/snapshots 87 | 88 | false 89 | 90 | 91 | true 92 | daily 93 | warn 94 | 95 | 96 | 97 | 98 | 99 | 100 | central 101 | https://repo.maven.apache.org/maven2 102 | 103 | true 104 | 105 | 106 | false 107 | 108 | 109 | 110 | apache 111 | Apache Repository Snapshots 112 | http://repository.apache.org/snapshots 113 | 114 | false 115 | 116 | 117 | true 118 | daily 119 | warn 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | ${spark.group} 128 | spark-yarn_${scala.binary.version} 129 | ${spark.version} 130 | ${spark.scope} 131 | 132 | 133 | org.apache.hadoop 134 | * 135 | 136 | 137 | 138 | 139 | ${spark.group} 140 | spark-hive_${scala.binary.version} 141 | ${spark.version} 142 | ${spark.scope} 143 | 144 | 145 | 146 | ${spark.group} 147 | spark-tags_${scala.binary.version} 148 | ${spark.version} 149 | ${spark.scope} 150 | 151 | 152 | 153 | org.scala-lang 154 | scala-library 155 | ${scala.version} 156 | provided 157 | 158 | 159 | 160 | org.apache.hadoop 161 | hadoop-mapreduce-client-core 162 | ${hadoop.version} 163 | ${hadoop.deps.scope} 164 | 165 | 166 | 167 | org.apache.hadoop 168 | hadoop-client 169 | ${hadoop.version} 170 | ${hadoop.deps.scope} 171 | 172 | 173 | asm 174 | asm 175 | 176 | 177 | org.codehaus.jackson 178 | jackson-mapper-asl 179 | 180 | 181 | org.ow2.asm 182 | asm 183 | 184 | 185 | org.jboss.netty 186 | netty 187 | 188 | 189 | commons-logging 190 | commons-logging 191 | 192 | 193 | org.mockito 194 | mockito-all 195 | 196 | 197 | org.mortbay.jetty 198 | servlet-api-2.5 199 | 200 | 201 | javax.servlet 202 | servlet-api 203 | 204 | 205 | junit 206 | junit 207 | 208 | 209 | com.sun.jersey 210 | * 211 | 212 | 213 | com.sun.jersey.jersey-test-framework 214 | * 215 | 216 | 217 | com.sun.jersey.contribs 218 | * 219 | 220 | 221 | 222 | 223 | 224 | org.eclipse.jetty 225 | jetty-servlet 226 | 9.3.11.v20160721 227 | 228 | 229 | 230 | com.google.guava 231 | guava 232 | 14.0.1 233 | provided 234 | 235 | 236 | 237 | net.sf.jpam 238 | jpam 239 | ${jpam.version} 240 | provided 241 | 242 | 243 | 244 | org.apache.hadoop 245 | hadoop-yarn-client 246 | ${hadoop.version} 247 | ${hadoop.deps.scope} 248 | 249 | 250 | 251 | org.scalatest 252 | scalatest_${scala.binary.version} 253 | ${scalatest.version} 254 | test 255 | 256 | 257 | 258 | ${spark.group} 259 | spark-core_${scala.binary.version} 260 | ${spark.version} 261 | 262 | 263 | 264 | ${spark.group} 265 | spark-catalyst_${scala.binary.version} 266 | ${spark.version} 267 | test-jar 268 | test 269 | 270 | 271 | ${spark.group} 272 | spark-sql_${scala.binary.version} 273 | ${spark.version} 274 | test-jar 275 | test 276 | 277 | 278 | ${hive.group} 279 | hive-service 280 | ${hive.version} 281 | test 282 | 283 | 284 | 285 | org.apache.hadoop 286 | hadoop-minikdc 287 | ${hadoop.version} 288 | test 289 | 290 | 291 | org.apache.directory.api 292 | api-all 293 | 294 | 295 | org.apache.directory.jdbm 296 | apacheds-jdbm1 297 | 298 | 299 | 300 | 301 | org.apache.directory.server 302 | apacheds-service 303 | ${apacheds.version} 304 | test 305 | 306 | 307 | bouncycastle 308 | bcprov-jdk15 309 | 310 | 311 | 312 | 313 | org.apache.curator 314 | curator-test 315 | 2.6.0 316 | test 317 | 318 | 319 | org.mockito 320 | mockito-core 321 | 1.10.19 322 | test 323 | 324 | 325 | 326 | 327 | 328 | 329 | spark-2.1 330 | 331 | 2.1.2 332 | 333 | 334 | 335 | 336 | spark-2.2 337 | 338 | 2.2.1 339 | 340 | 341 | 342 | 343 | spark-2.3 344 | 345 | 2.3.0 346 | 3.0.3 347 | 348 | 349 | 350 | 351 | -------------------------------------------------------------------------------- /slides/spark_core/context_cleaner.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Apache Spark Garbge Collector 4 | 5 | 145 | 146 | 147 | 773 | 775 | 790 | 791 | 792 | -------------------------------------------------------------------------------- /slides/spark_basics/spark_basics_and_quick_start.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Spark概述及快速入门指南 4 | 5 | 145 | 146 | 147 | 945 | 947 | 962 | 963 | 964 | -------------------------------------------------------------------------------- /slides/spark_core/rdd_basics.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | RDD Basics 5 | 6 | 135 | 136 | 137 | 1098 | 1100 | 1115 | 1116 | 1117 | --------------------------------------------------------------------------------