├── .gitignore ├── LICENSE ├── README.md ├── bikeshare ├── README.md └── src │ ├── 00.setup.sh │ ├── 01.etl.scala │ ├── 02.explore.scala │ ├── 03.featurize.scala │ ├── 04.naive_bayes.scala │ ├── 05.decision_tree.scala │ ├── 06.random_forest.scala │ ├── 07.compare_models.scala │ ├── 08.save_parquet.scala │ ├── 10.examine_parquet.sh │ ├── 11.load_parquet.scala │ ├── 12.build_graph.scala │ ├── 13.pagerank.scala │ ├── 14.init_sssp.scala │ ├── 15.sssp_impl.scala │ ├── 16.map_routes.scala │ ├── 17.setup.sh │ ├── 18.parquet.py │ └── 19.query.py ├── data ├── CHANGES.txt ├── README.md ├── clk.tsv ├── error_log.txt └── reg.tsv ├── pom.xml ├── simple.sbt └── src ├── 00.pre_flight.scala ├── 01.log_mining.scala ├── 02.wc.py ├── 02.wc.scala ├── 03.join.scala ├── 04.code_exercise.scala ├── 05.pi_approx.scala ├── 06.streaming.scala ├── 07.nwc.py ├── 08.nwc_stateful.py ├── 09.graphx.scala ├── 10.SimpleApp.java └── 11.SimpleApp.scala /.gitignore: -------------------------------------------------------------------------------- 1 | derby.log 2 | metastore_db/ 3 | src/checkpoint/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/README.md -------------------------------------------------------------------------------- /bikeshare/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/README.md -------------------------------------------------------------------------------- /bikeshare/src/00.setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/00.setup.sh -------------------------------------------------------------------------------- /bikeshare/src/01.etl.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/01.etl.scala -------------------------------------------------------------------------------- /bikeshare/src/02.explore.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/02.explore.scala -------------------------------------------------------------------------------- /bikeshare/src/03.featurize.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/03.featurize.scala -------------------------------------------------------------------------------- /bikeshare/src/04.naive_bayes.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/04.naive_bayes.scala -------------------------------------------------------------------------------- /bikeshare/src/05.decision_tree.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/05.decision_tree.scala -------------------------------------------------------------------------------- /bikeshare/src/06.random_forest.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/06.random_forest.scala -------------------------------------------------------------------------------- /bikeshare/src/07.compare_models.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/07.compare_models.scala -------------------------------------------------------------------------------- /bikeshare/src/08.save_parquet.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/08.save_parquet.scala -------------------------------------------------------------------------------- /bikeshare/src/10.examine_parquet.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/10.examine_parquet.sh -------------------------------------------------------------------------------- /bikeshare/src/11.load_parquet.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/11.load_parquet.scala -------------------------------------------------------------------------------- /bikeshare/src/12.build_graph.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/12.build_graph.scala -------------------------------------------------------------------------------- /bikeshare/src/13.pagerank.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/13.pagerank.scala -------------------------------------------------------------------------------- /bikeshare/src/14.init_sssp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/14.init_sssp.scala -------------------------------------------------------------------------------- /bikeshare/src/15.sssp_impl.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/15.sssp_impl.scala -------------------------------------------------------------------------------- /bikeshare/src/16.map_routes.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/16.map_routes.scala -------------------------------------------------------------------------------- /bikeshare/src/17.setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/17.setup.sh -------------------------------------------------------------------------------- /bikeshare/src/18.parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/18.parquet.py -------------------------------------------------------------------------------- /bikeshare/src/19.query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/bikeshare/src/19.query.py -------------------------------------------------------------------------------- /data/CHANGES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/data/CHANGES.txt -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/data/README.md -------------------------------------------------------------------------------- /data/clk.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/data/clk.tsv -------------------------------------------------------------------------------- /data/error_log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/data/error_log.txt -------------------------------------------------------------------------------- /data/reg.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/data/reg.tsv -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/pom.xml -------------------------------------------------------------------------------- /simple.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/simple.sbt -------------------------------------------------------------------------------- /src/00.pre_flight.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/00.pre_flight.scala -------------------------------------------------------------------------------- /src/01.log_mining.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/01.log_mining.scala -------------------------------------------------------------------------------- /src/02.wc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/02.wc.py -------------------------------------------------------------------------------- /src/02.wc.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/02.wc.scala -------------------------------------------------------------------------------- /src/03.join.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/03.join.scala -------------------------------------------------------------------------------- /src/04.code_exercise.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/04.code_exercise.scala -------------------------------------------------------------------------------- /src/05.pi_approx.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/05.pi_approx.scala -------------------------------------------------------------------------------- /src/06.streaming.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/06.streaming.scala -------------------------------------------------------------------------------- /src/07.nwc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/07.nwc.py -------------------------------------------------------------------------------- /src/08.nwc_stateful.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/08.nwc_stateful.py -------------------------------------------------------------------------------- /src/09.graphx.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/09.graphx.scala -------------------------------------------------------------------------------- /src/10.SimpleApp.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/10.SimpleApp.java -------------------------------------------------------------------------------- /src/11.SimpleApp.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceteri/intro_spark/HEAD/src/11.SimpleApp.scala --------------------------------------------------------------------------------