├── LICENSE.md ├── README.md ├── code ├── README.md ├── chap01 │ ├── README.md │ ├── basic_dataframe_example.log │ ├── basic_dataframe_example.py │ ├── basic_dataframe_example.sh │ ├── compute_stats.log │ ├── compute_stats.py │ ├── compute_stats.sh │ ├── compute_stats_detailed.log │ ├── compute_stats_detailed.py │ ├── compute_stats_detailed.sh │ ├── compute_stats_with_threshold.log │ ├── compute_stats_with_threshold.py │ ├── compute_stats_with_threshold.sh │ ├── compute_stats_with_threshold_and_filter.log │ ├── compute_stats_with_threshold_and_filter.py │ ├── compute_stats_with_threshold_and_filter.sh │ ├── dataframe_creation_from_csv.log │ ├── dataframe_creation_from_csv.py │ ├── dataframe_creation_from_csv.sh │ ├── fox_data.txt │ ├── name_city_age.csv │ ├── rdd_creation_from_csv.log │ ├── rdd_creation_from_csv.py │ ├── rdd_creation_from_csv.sh │ ├── sample.txt │ ├── sample_numbers.txt │ ├── sample_people.json │ ├── sort_numbers.log │ ├── sort_numbers.py │ ├── sort_numbers.sh │ ├── url_frequencies.txt │ ├── word_count.log │ ├── word_count.py │ ├── word_count.sh │ ├── word_count_with_params.py │ ├── word_count_with_params.sh │ ├── word_count_with_threshold.py │ └── word_count_with_threshold.sh ├── chap02 │ ├── README.md │ ├── generate_key_value_pairs.py │ ├── sample_file.txt │ ├── sample_file_extra.txt │ ├── sum_by_groupbykey.log │ ├── sum_by_groupbykey.py │ ├── sum_by_groupbykey.sh │ ├── sum_by_reducebykey.log │ ├── sum_by_reducebykey.py │ ├── sum_by_reducebykey.sh │ ├── word_count_driver.log │ ├── word_count_driver.py │ ├── word_count_driver.sh │ ├── word_count_driver_by_groupbykey.log │ ├── word_count_driver_by_groupbykey.py │ ├── word_count_driver_by_groupbykey.sh │ ├── word_count_driver_shorthand.log │ ├── word_count_driver_shorthand.py │ ├── word_count_driver_shorthand.sh │ ├── word_count_driver_shorthand_by_groupbykey.log │ ├── word_count_driver_shorthand_by_groupbykey.py │ ├── word_count_driver_shorthand_by_groupbykey.sh │ ├── word_count_driver_shorthand_sorted.log │ ├── word_count_driver_shorthand_sorted.py │ ├── word_count_driver_shorthand_sorted.sh │ ├── word_count_driver_with_filter.log │ ├── word_count_driver_with_filter.py │ ├── word_count_driver_with_filter.sh │ ├── word_count_driver_with_filter_and_threshold.log │ ├── word_count_driver_with_filter_and_threshold.py │ ├── word_count_driver_with_filter_and_threshold.sh │ ├── word_count_python.py │ └── word_count_python_shorthand.py ├── chap03 │ ├── datadir │ │ ├── file1 │ │ └── file2 │ ├── dataframe_creation_from_collection.log │ ├── dataframe_creation_from_collection.py │ ├── dataframe_creation_from_collection.sh │ ├── dataframe_creation_from_csv_no_header.log │ ├── dataframe_creation_from_csv_no_header.py │ ├── dataframe_creation_from_csv_no_header.sh │ ├── dataframe_creation_from_csv_with_header.log │ ├── dataframe_creation_from_csv_with_header.py │ ├── dataframe_creation_from_csv_with_header.sh │ ├── dataframe_creation_from_dictionary.log │ ├── dataframe_creation_from_dictionary.py │ ├── dataframe_creation_from_dictionary.sh │ ├── dataframe_creation_from_directory.log │ ├── dataframe_creation_from_directory.py │ ├── dataframe_creation_from_directory.sh │ ├── dataframe_creation_from_rdd.log │ ├── dataframe_creation_from_rdd.py │ ├── dataframe_creation_from_rdd.sh │ ├── kv.txt │ ├── kv_no_header.txt │ ├── kv_with_header.txt │ ├── rdd_creation_from_collection.log │ ├── rdd_creation_from_collection.py │ ├── rdd_creation_from_collection.sh │ ├── rdd_creation_from_dataframe.log │ ├── rdd_creation_from_dataframe.py │ ├── rdd_creation_from_dataframe.sh │ ├── rdd_creation_from_dictionary.log │ ├── rdd_creation_from_dictionary.py │ ├── rdd_creation_from_dictionary.sh │ ├── rdd_creation_from_directory.log │ ├── rdd_creation_from_directory.py │ ├── rdd_creation_from_directory.sh │ ├── rdd_creation_from_file.log │ ├── rdd_creation_from_file.py │ ├── rdd_creation_from_file.sh │ ├── sample.txt │ ├── sample_dir │ │ ├── file1.txt │ │ └── file2.txt │ ├── sample_dir2 │ │ ├── file1.txt │ │ ├── file2.txt │ │ ├── file3.csv │ │ └── file4.csv │ ├── word_count.py │ ├── word_count.sh │ ├── word_count_with_params.py │ ├── word_count_with_params.sh │ ├── word_count_with_threshold.py │ └── word_count_with_threshold.sh ├── chap04 │ ├── DNA-FASTA-PERFORMANCE │ │ └── performance_of_FASTA_versions_1_2_3.txt │ ├── DNA-FASTA-V1 │ │ ├── run_dna_base_count_ver_1.py │ │ ├── run_dna_base_count_ver_1.sh │ │ ├── run_dna_base_count_ver_1_1GB.sh │ │ └── run_dna_base_count_ver_1_big.sh │ ├── DNA-FASTA-V2 │ │ ├── dna_base_count_ver_2.py │ │ ├── run_dna_base_count_ver_2.sh │ │ ├── run_dna_base_count_ver_2_1GB.sh │ │ └── run_dna_base_count_ver_2_big.sh │ ├── DNA-FASTA-V3 │ │ ├── dna_base_count_ver_3.py │ │ ├── run_dna_base_count_ver_3.sh │ │ ├── run_dna_base_count_ver_3_1GB.sh │ │ └── run_dna_base_count_ver_3_big.sh │ ├── DNA-FASTQ │ │ ├── dna_base_count_fastq.py │ │ └── run_dna_base_count_fastq.sh │ ├── README.md │ └── data │ │ ├── sample.fasta │ │ └── sp1.fastq ├── chap05 │ ├── average_by_key_use_aggregatebykey.log │ ├── average_by_key_use_aggregatebykey.py │ ├── average_by_key_use_aggregatebykey.sh │ ├── average_by_key_use_combinebykey.log │ ├── average_by_key_use_combinebykey.py │ ├── average_by_key_use_combinebykey.sh │ ├── average_by_key_use_foldbykey.log │ ├── average_by_key_use_foldbykey.py │ ├── average_by_key_use_foldbykey.sh │ ├── average_by_key_use_groupbykey.log │ ├── average_by_key_use_groupbykey.py │ ├── average_by_key_use_groupbykey.sh │ ├── average_by_key_use_reducebykey.log │ ├── average_by_key_use_reducebykey.py │ ├── average_by_key_use_reducebykey.sh │ ├── dataframe_action_describe.log │ ├── dataframe_action_describe.py │ ├── dataframe_action_describe.sh │ ├── dataframe_drop.log │ ├── dataframe_drop.py │ ├── dataframe_drop.sh │ ├── dataframe_filter.log │ ├── dataframe_filter.py │ ├── dataframe_filter.sh │ ├── dataframe_join_cross.log │ ├── dataframe_join_cross.py │ ├── dataframe_join_cross.sh │ ├── dataframe_join_inner.log │ ├── dataframe_join_inner.py │ ├── dataframe_join_inner.sh │ ├── dataframe_join_left.log │ ├── dataframe_join_left.py │ ├── dataframe_join_left.sh │ ├── dataframe_join_right.log │ ├── dataframe_join_right.py │ ├── dataframe_join_right.sh │ ├── dataframe_sql.log │ ├── dataframe_sql.py │ ├── dataframe_sql.sh │ ├── dataframe_withcolumn.log │ ├── dataframe_withcolumn.py │ ├── dataframe_withcolumn.sh │ ├── emps.txt │ ├── rdd_transformation_cartesian.log │ ├── rdd_transformation_cartesian.py │ ├── rdd_transformation_cartesian.sh │ ├── rdd_transformation_combinebykey.log │ ├── rdd_transformation_combinebykey.py │ ├── rdd_transformation_combinebykey.sh │ ├── rdd_transformation_filter.log │ ├── rdd_transformation_filter.py │ ├── rdd_transformation_filter.sh │ ├── rdd_transformation_flatmap.log │ ├── rdd_transformation_flatmap.py │ ├── rdd_transformation_flatmap.sh │ ├── rdd_transformation_groupbykey.log │ ├── rdd_transformation_groupbykey.py │ ├── rdd_transformation_groupbykey.sh │ ├── rdd_transformation_join.log │ ├── rdd_transformation_join.py │ ├── rdd_transformation_join.sh │ ├── rdd_transformation_map.log │ ├── rdd_transformation_map.py │ ├── rdd_transformation_map.sh │ ├── rdd_transformation_mappartitions.log │ ├── rdd_transformation_mappartitions.py │ ├── rdd_transformation_mappartitions.sh │ ├── rdd_transformation_mappartitions_handle_empty_partitions.log │ ├── rdd_transformation_mappartitions_handle_empty_partitions.py │ ├── rdd_transformation_mappartitions_handle_empty_partitions.sh │ ├── rdd_transformation_reducebykey.log │ ├── rdd_transformation_reducebykey.py │ ├── rdd_transformation_reducebykey.sh │ ├── rdd_transformation_sortby.log │ ├── rdd_transformation_sortby.py │ ├── rdd_transformation_sortby.sh │ ├── rdd_transformation_takeordered.log │ ├── rdd_transformation_takeordered.py │ ├── rdd_transformation_takeordered.sh │ ├── sample_5_records.txt │ └── users.txt ├── chap06 │ ├── README.md │ ├── average_by_key_use_aggregatebykey.py │ ├── average_by_key_use_aggregatebykey.sh │ ├── average_by_key_use_combinebykey.py │ ├── average_by_key_use_combinebykey.sh │ ├── average_by_key_use_groupbykey.py │ ├── average_by_key_use_groupbykey.sh │ ├── average_by_key_use_reducebykey.py │ └── average_by_key_use_reducebykey.sh ├── chap07 │ ├── WorldCupPlayers.csv │ ├── WorldCupPlayers.csv.data.source │ ├── WorldCupPlayers.csv.zip │ ├── customers.RECORD.FORMAT.txt │ ├── customers.txt │ ├── dataframe_creation_add_columns.log │ ├── dataframe_creation_add_columns.py │ ├── dataframe_creation_add_columns.sh │ ├── dataframe_creation_aggregate_multiple_columns.log │ ├── dataframe_creation_aggregate_multiple_columns.py │ ├── dataframe_creation_aggregate_multiple_columns.sh │ ├── dataframe_creation_aggregate_single_column.log │ ├── dataframe_creation_aggregate_single_column.py │ ├── dataframe_creation_aggregate_single_column.sh │ ├── dataframe_creation_call_udf.log │ ├── dataframe_creation_call_udf.py │ ├── dataframe_creation_call_udf.sh │ ├── dataframe_creation_cvs_no_header.log │ ├── dataframe_creation_cvs_no_header.py │ ├── dataframe_creation_cvs_no_header.sh │ ├── dataframe_creation_cvs_with_header.log │ ├── dataframe_creation_cvs_with_header.py │ ├── dataframe_creation_cvs_with_header.sh │ ├── dataframe_creation_from_collections.log │ ├── dataframe_creation_from_collections.py │ ├── dataframe_creation_from_collections.sh │ ├── dataframe_creation_from_pandas.log │ ├── dataframe_creation_from_pandas.py │ ├── dataframe_creation_from_pandas.sh │ ├── dataframe_creation_from_rows.log │ ├── dataframe_creation_from_rows.py │ ├── dataframe_creation_from_rows.sh │ ├── dataframe_creation_order_by.log │ ├── dataframe_creation_order_by.py │ ├── dataframe_creation_order_by.sh │ ├── dataframe_creation_with_explicit_schema.log │ ├── dataframe_creation_with_explicit_schema.py │ ├── dataframe_creation_with_explicit_schema.sh │ ├── dataframe_crosstab.log │ ├── dataframe_crosstab.py │ ├── dataframe_crosstab.sh │ ├── dataframe_drop_column.log │ ├── dataframe_drop_column.py │ ├── dataframe_drop_column.sh │ ├── dataframe_drop_duplicates.log │ ├── dataframe_drop_duplicates.py │ ├── dataframe_drop_duplicates.sh │ ├── dataframe_multi_dim_agg_groupby.log │ ├── dataframe_multi_dim_agg_groupby.py │ ├── dataframe_multi_dim_agg_groupby.sh │ ├── dataframe_multi_dim_agg_rollup.log │ ├── dataframe_multi_dim_agg_rollup.py │ ├── dataframe_multi_dim_agg_rollup.sh │ ├── dataframe_tutorial_with_worldcup.log │ ├── dataframe_tutorial_with_worldcup.py │ ├── dataframe_tutorial_with_worldcup.sh │ ├── dataframe_with_statistical_data.log │ ├── dataframe_with_statistical_data.py │ ├── dataframe_with_statistical_data.sh │ ├── emps_no_header.txt │ ├── emps_with_header.txt │ ├── life_expentancy.txt │ ├── partition_data_by_customer_and_year.log │ ├── partition_data_by_customer_and_year.py │ ├── partition_data_by_customer_and_year.sh │ ├── partition_data_by_customer_and_year_single_file.py │ ├── strings-2.parquet │ ├── users.parquet │ └── users4.parquet ├── chap08 │ ├── cats.no.header.csv │ ├── cats.with.header.csv │ ├── datasource_csv_reader_no_header.log │ ├── datasource_csv_reader_no_header.py │ ├── datasource_csv_reader_no_header.sh │ ├── datasource_csv_reader_with_header.log │ ├── datasource_csv_reader_with_header.py │ ├── datasource_csv_reader_with_header.sh │ ├── datasource_csv_writer.log │ ├── datasource_csv_writer.py │ ├── datasource_csv_writer.sh │ ├── datasource_elasticsearch_reader.log │ ├── datasource_elasticsearch_reader.py │ ├── datasource_elasticsearch_reader.sh │ ├── datasource_elasticsearch_writer.log │ ├── datasource_elasticsearch_writer.py │ ├── datasource_elasticsearch_writer.sh │ ├── datasource_gzip_reader.log │ ├── datasource_gzip_reader.py │ ├── datasource_gzip_reader.sh │ ├── datasource_jdbc_reader.log │ ├── datasource_jdbc_reader.py │ ├── datasource_jdbc_reader.sh │ ├── datasource_jdbc_writer.log │ ├── datasource_jdbc_writer.py │ ├── datasource_jdbc_writer.sh │ ├── datasource_json_reader_multi_line.log │ ├── datasource_json_reader_multi_line.py │ ├── datasource_json_reader_multi_line.sh │ ├── datasource_json_reader_single_line.log │ ├── datasource_json_reader_single_line.py │ ├── datasource_json_reader_single_line.sh │ ├── datasource_mongodb_reader.log │ ├── datasource_mongodb_reader.py │ ├── datasource_mongodb_reader.sh │ ├── datasource_mongodb_writer.log │ ├── datasource_mongodb_writer.py │ ├── datasource_mongodb_writer.sh │ ├── datasource_redis_reader.log │ ├── datasource_redis_reader.py │ ├── datasource_redis_reader.sh │ ├── datasource_redis_writer.log │ ├── datasource_redis_writer.py │ ├── datasource_redis_writer.sh │ ├── datasource_textfile_reader.log │ ├── datasource_textfile_reader.py │ ├── datasource_textfile_reader.sh │ ├── datasource_textfile_writer.log │ ├── datasource_textfile_writer.py │ ├── datasource_textfile_writer.sh │ ├── images │ │ ├── cat1.jpg │ │ ├── cat2.jpg │ │ ├── cat3.jpg │ │ ├── cat4.jpg │ │ ├── duck1.jpg │ │ ├── duck2.jpg │ │ └── not-image.txt │ ├── mongodb_coll44.png │ ├── mongodb_coll66.png │ ├── name_age_salary.csv │ ├── people.txt │ ├── sample_multi_line.json │ ├── sample_no_header.csv │ ├── sample_numbers.txt │ ├── sample_single_line.json │ ├── sample_with_header.csv │ └── twitter.avro ├── chap09 │ ├── logistic_regression_builder.log │ ├── logistic_regression_builder.py │ ├── logistic_regression_builder.sh │ ├── logistic_regression_predictor.log │ ├── logistic_regression_predictor.py │ ├── logistic_regression_predictor.sh │ ├── model │ │ ├── data │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000-1d219005-0cb4-4a77-98bf-2f69a69655a1-c000.snappy.parquet.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000-1d219005-0cb4-4a77-98bf-2f69a69655a1-c000.snappy.parquet │ │ └── metadata │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000 │ ├── new_emails.txt │ ├── training_emails_nospam.txt │ └── training_emails_spam.txt ├── chap10 │ ├── recommendation_example.py │ └── test.data ├── chap11 │ ├── airports.json │ ├── breadth_first_search_example.log │ ├── breadth_first_search_example.py │ ├── breadth_first_search_example.sh │ ├── connected_component_example.log │ ├── connected_component_example.py │ ├── connected_component_example.sh │ ├── flightdata2018.json │ ├── graph_builder.log │ ├── graph_builder.py │ ├── graph_builder.sh │ ├── label_propagation_algorithm_example.log │ ├── label_propagation_algorithm_example.py │ ├── label_propagation_algorithm_example.sh │ ├── pagerank.py │ ├── pagerank_data.txt │ ├── pagerank_example.log │ ├── pagerank_example.py │ ├── pagerank_example.sh │ ├── sample_graph_edges.txt │ ├── sample_graph_vertices.txt │ ├── shortest_path_finder.log │ ├── shortest_path_finder.py │ ├── shortest_path_finder.sh │ ├── triangles_counter.log │ ├── triangles_counter.py │ ├── triangles_counter.sh │ ├── unique_triangles_finder.log │ ├── unique_triangles_finder.py │ └── unique_triangles_finder.sh ├── chap12 │ ├── average_monoid_use_aggregatebykey.log │ ├── average_monoid_use_aggregatebykey.py │ ├── average_monoid_use_aggregatebykey.sh │ ├── average_monoid_use_combinebykey.log │ ├── average_monoid_use_combinebykey.py │ ├── average_monoid_use_combinebykey.sh │ ├── average_monoid_use_groupbykey.log │ ├── average_monoid_use_groupbykey.py │ ├── average_monoid_use_groupbykey.sh │ ├── average_monoid_use_reducebykey.log │ ├── average_monoid_use_reducebykey.py │ ├── average_monoid_use_reducebykey.sh │ ├── inmapper_combiner_local_aggregation.log │ ├── inmapper_combiner_local_aggregation.py │ ├── inmapper_combiner_local_aggregation.sh │ ├── inmapper_combiner_use_basic_mapreduce.log │ ├── inmapper_combiner_use_basic_mapreduce.py │ ├── inmapper_combiner_use_basic_mapreduce.sh │ ├── inmapper_combiner_use_mappartitions.log │ ├── inmapper_combiner_use_mappartitions.py │ ├── inmapper_combiner_use_mappartitions.sh │ ├── minmax_force_empty_partitions.log │ ├── minmax_force_empty_partitions.py │ ├── minmax_force_empty_partitions.sh │ ├── minmax_use_mappartitions.log │ ├── minmax_use_mappartitions.py │ ├── minmax_use_mappartitions.sh │ ├── sample_dna_seq.txt │ ├── sample_input.txt │ ├── sample_numbers.txt │ ├── top_N_use_mappartitions.log │ ├── top_N_use_mappartitions.py │ ├── top_N_use_mappartitions.sh │ ├── top_N_use_takeordered.log │ ├── top_N_use_takeordered.py │ └── top_N_use_takeordered.sh ├── examples │ └── wordcount │ │ ├── foxdata.txt │ │ ├── wordcount.py │ │ └── wordcount.py.usage └── jars │ ├── avro-mapred-1.7.7-hadoop1.jar │ ├── avro-mapred-1.7.7-hadoop2.jar │ ├── com-cotdp-hadoop-1.0-SNAPSHOT.jar │ ├── elasticsearch-hadoop-6.4.2.jar │ ├── elasticsearch-spark_2.11-2.4.5.jar │ ├── graphframes-0.6.0-spark2.3-s_2.11.jar │ ├── hbase-spark-connector-1.0.0.jar │ ├── htrace-core-3.1.0-incubating.jar │ ├── mongo-java-driver-3.8.2.jar │ ├── mongo-spark-connector_2.11-2.2.5.jar │ ├── mongodb-driver-3.8.2.jar │ ├── mysql-connector-java-5.1.42.jar │ ├── shc-core-1.1.3-2.3-s_2.11.jar │ ├── shc-examples-1.1.3-2.3-s_2.11.jar │ ├── spark-redis-2.3.1-SNAPSHOT-jar-with-dependencies.jar │ └── spark-redis-2.3.1-SNAPSHOT.jar ├── images ├── pyspark_algorithms.jpg ├── pyspark_algorithms0.jpg ├── pyspark_algorithms2.jpg └── pyspark_algorithms3.jpg ├── sample_chapters ├── Appendix_Questions_and_Answers.epub ├── Appendix_Questions_and_Answers.pdf ├── README.md ├── chap04_Getting_Started_with_PySpark.epub └── chap04_Getting_Started_with_PySpark.pdf └── where_to_buy_book └── README.md /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/README.md -------------------------------------------------------------------------------- /code/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/README.md -------------------------------------------------------------------------------- /code/chap01/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/README.md -------------------------------------------------------------------------------- /code/chap01/basic_dataframe_example.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/basic_dataframe_example.log -------------------------------------------------------------------------------- /code/chap01/basic_dataframe_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/basic_dataframe_example.py -------------------------------------------------------------------------------- /code/chap01/basic_dataframe_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/basic_dataframe_example.sh -------------------------------------------------------------------------------- /code/chap01/compute_stats.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats.log -------------------------------------------------------------------------------- /code/chap01/compute_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats.py -------------------------------------------------------------------------------- /code/chap01/compute_stats.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats.sh -------------------------------------------------------------------------------- /code/chap01/compute_stats_detailed.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_detailed.log -------------------------------------------------------------------------------- /code/chap01/compute_stats_detailed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_detailed.py -------------------------------------------------------------------------------- /code/chap01/compute_stats_detailed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_detailed.sh -------------------------------------------------------------------------------- /code/chap01/compute_stats_with_threshold.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_with_threshold.log -------------------------------------------------------------------------------- /code/chap01/compute_stats_with_threshold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_with_threshold.py -------------------------------------------------------------------------------- /code/chap01/compute_stats_with_threshold.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_with_threshold.sh -------------------------------------------------------------------------------- /code/chap01/compute_stats_with_threshold_and_filter.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_with_threshold_and_filter.log -------------------------------------------------------------------------------- /code/chap01/compute_stats_with_threshold_and_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_with_threshold_and_filter.py -------------------------------------------------------------------------------- /code/chap01/compute_stats_with_threshold_and_filter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/compute_stats_with_threshold_and_filter.sh -------------------------------------------------------------------------------- /code/chap01/dataframe_creation_from_csv.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/dataframe_creation_from_csv.log -------------------------------------------------------------------------------- /code/chap01/dataframe_creation_from_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/dataframe_creation_from_csv.py -------------------------------------------------------------------------------- /code/chap01/dataframe_creation_from_csv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/dataframe_creation_from_csv.sh -------------------------------------------------------------------------------- /code/chap01/fox_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/fox_data.txt -------------------------------------------------------------------------------- /code/chap01/name_city_age.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/name_city_age.csv -------------------------------------------------------------------------------- /code/chap01/rdd_creation_from_csv.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/rdd_creation_from_csv.log -------------------------------------------------------------------------------- /code/chap01/rdd_creation_from_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/rdd_creation_from_csv.py -------------------------------------------------------------------------------- /code/chap01/rdd_creation_from_csv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/rdd_creation_from_csv.sh -------------------------------------------------------------------------------- /code/chap01/sample.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/sample.txt -------------------------------------------------------------------------------- /code/chap01/sample_numbers.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/sample_numbers.txt -------------------------------------------------------------------------------- /code/chap01/sample_people.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/sample_people.json -------------------------------------------------------------------------------- /code/chap01/sort_numbers.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/sort_numbers.log -------------------------------------------------------------------------------- /code/chap01/sort_numbers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/sort_numbers.py -------------------------------------------------------------------------------- /code/chap01/sort_numbers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/sort_numbers.sh -------------------------------------------------------------------------------- /code/chap01/url_frequencies.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/url_frequencies.txt -------------------------------------------------------------------------------- /code/chap01/word_count.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count.log -------------------------------------------------------------------------------- /code/chap01/word_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count.py -------------------------------------------------------------------------------- /code/chap01/word_count.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count.sh -------------------------------------------------------------------------------- /code/chap01/word_count_with_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count_with_params.py -------------------------------------------------------------------------------- /code/chap01/word_count_with_params.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count_with_params.sh -------------------------------------------------------------------------------- /code/chap01/word_count_with_threshold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count_with_threshold.py -------------------------------------------------------------------------------- /code/chap01/word_count_with_threshold.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap01/word_count_with_threshold.sh -------------------------------------------------------------------------------- /code/chap02/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/README.md -------------------------------------------------------------------------------- /code/chap02/generate_key_value_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/generate_key_value_pairs.py -------------------------------------------------------------------------------- /code/chap02/sample_file.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sample_file.txt -------------------------------------------------------------------------------- /code/chap02/sample_file_extra.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sample_file_extra.txt -------------------------------------------------------------------------------- /code/chap02/sum_by_groupbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sum_by_groupbykey.log -------------------------------------------------------------------------------- /code/chap02/sum_by_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sum_by_groupbykey.py -------------------------------------------------------------------------------- /code/chap02/sum_by_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sum_by_groupbykey.sh -------------------------------------------------------------------------------- /code/chap02/sum_by_reducebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sum_by_reducebykey.log -------------------------------------------------------------------------------- /code/chap02/sum_by_reducebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sum_by_reducebykey.py -------------------------------------------------------------------------------- /code/chap02/sum_by_reducebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/sum_by_reducebykey.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver_by_groupbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_by_groupbykey.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver_by_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_by_groupbykey.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver_by_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_by_groupbykey.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand_by_groupbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand_by_groupbykey.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand_by_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand_by_groupbykey.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand_by_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand_by_groupbykey.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand_sorted.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand_sorted.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand_sorted.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand_sorted.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver_shorthand_sorted.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_shorthand_sorted.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver_with_filter.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_with_filter.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver_with_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_with_filter.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver_with_filter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_with_filter.sh -------------------------------------------------------------------------------- /code/chap02/word_count_driver_with_filter_and_threshold.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_with_filter_and_threshold.log -------------------------------------------------------------------------------- /code/chap02/word_count_driver_with_filter_and_threshold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_with_filter_and_threshold.py -------------------------------------------------------------------------------- /code/chap02/word_count_driver_with_filter_and_threshold.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_driver_with_filter_and_threshold.sh -------------------------------------------------------------------------------- /code/chap02/word_count_python.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_python.py -------------------------------------------------------------------------------- /code/chap02/word_count_python_shorthand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap02/word_count_python_shorthand.py -------------------------------------------------------------------------------- /code/chap03/datadir/file1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/datadir/file1 -------------------------------------------------------------------------------- /code/chap03/datadir/file2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/datadir/file2 -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_collection.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_collection.log -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_collection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_collection.py -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_collection.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_collection.sh -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_csv_no_header.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_csv_no_header.log -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_csv_no_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_csv_no_header.py -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_csv_no_header.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_csv_no_header.sh -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_csv_with_header.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_csv_with_header.log -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_csv_with_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_csv_with_header.py -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_csv_with_header.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_csv_with_header.sh -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_dictionary.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_dictionary.log -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_dictionary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_dictionary.py -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_dictionary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_dictionary.sh -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_directory.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_directory.log -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_directory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_directory.py -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_directory.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_directory.sh -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_rdd.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_rdd.log -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_rdd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_rdd.py -------------------------------------------------------------------------------- /code/chap03/dataframe_creation_from_rdd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/dataframe_creation_from_rdd.sh -------------------------------------------------------------------------------- /code/chap03/kv.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/kv.txt -------------------------------------------------------------------------------- /code/chap03/kv_no_header.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/kv_no_header.txt -------------------------------------------------------------------------------- /code/chap03/kv_with_header.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/kv_with_header.txt -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_collection.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_collection.log -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_collection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_collection.py -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_collection.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_collection.sh -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_dataframe.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_dataframe.log -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_dataframe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_dataframe.py -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_dataframe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_dataframe.sh -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_dictionary.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_dictionary.log -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_dictionary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_dictionary.py -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_dictionary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_dictionary.sh -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_directory.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_directory.log -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_directory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_directory.py -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_directory.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_directory.sh -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_file.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_file.log -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_file.py -------------------------------------------------------------------------------- /code/chap03/rdd_creation_from_file.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/rdd_creation_from_file.sh -------------------------------------------------------------------------------- /code/chap03/sample.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/sample.txt -------------------------------------------------------------------------------- /code/chap03/sample_dir/file1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/sample_dir/file1.txt -------------------------------------------------------------------------------- /code/chap03/sample_dir/file2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/sample_dir/file2.txt -------------------------------------------------------------------------------- /code/chap03/sample_dir2/file1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/sample_dir2/file1.txt -------------------------------------------------------------------------------- /code/chap03/sample_dir2/file2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/sample_dir2/file2.txt -------------------------------------------------------------------------------- /code/chap03/sample_dir2/file3.csv: -------------------------------------------------------------------------------- 1 | alex,33 2 | bob,45 3 | mary,25 4 | jeff,10 5 | -------------------------------------------------------------------------------- /code/chap03/sample_dir2/file4.csv: -------------------------------------------------------------------------------- 1 | amanda,44 2 | terry,64 3 | -------------------------------------------------------------------------------- /code/chap03/word_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/word_count.py -------------------------------------------------------------------------------- /code/chap03/word_count.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/word_count.sh -------------------------------------------------------------------------------- /code/chap03/word_count_with_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/word_count_with_params.py -------------------------------------------------------------------------------- /code/chap03/word_count_with_params.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/word_count_with_params.sh -------------------------------------------------------------------------------- /code/chap03/word_count_with_threshold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/word_count_with_threshold.py -------------------------------------------------------------------------------- /code/chap03/word_count_with_threshold.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap03/word_count_with_threshold.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-PERFORMANCE/performance_of_FASTA_versions_1_2_3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-PERFORMANCE/performance_of_FASTA_versions_1_2_3.txt -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1.py -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1_1GB.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1_1GB.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1_big.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V1/run_dna_base_count_ver_1_big.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V2/dna_base_count_ver_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V2/dna_base_count_ver_2.py -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V2/run_dna_base_count_ver_2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V2/run_dna_base_count_ver_2.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V2/run_dna_base_count_ver_2_1GB.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V2/run_dna_base_count_ver_2_1GB.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V2/run_dna_base_count_ver_2_big.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V2/run_dna_base_count_ver_2_big.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V3/dna_base_count_ver_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V3/dna_base_count_ver_3.py -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V3/run_dna_base_count_ver_3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V3/run_dna_base_count_ver_3.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V3/run_dna_base_count_ver_3_1GB.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V3/run_dna_base_count_ver_3_1GB.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTA-V3/run_dna_base_count_ver_3_big.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTA-V3/run_dna_base_count_ver_3_big.sh -------------------------------------------------------------------------------- /code/chap04/DNA-FASTQ/dna_base_count_fastq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTQ/dna_base_count_fastq.py -------------------------------------------------------------------------------- /code/chap04/DNA-FASTQ/run_dna_base_count_fastq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/DNA-FASTQ/run_dna_base_count_fastq.sh -------------------------------------------------------------------------------- /code/chap04/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/README.md -------------------------------------------------------------------------------- /code/chap04/data/sample.fasta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/data/sample.fasta -------------------------------------------------------------------------------- /code/chap04/data/sp1.fastq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap04/data/sp1.fastq -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_aggregatebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_aggregatebykey.log -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_aggregatebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_aggregatebykey.py -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_aggregatebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_aggregatebykey.sh -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_combinebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_combinebykey.log -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_combinebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_combinebykey.py -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_combinebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_combinebykey.sh -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_foldbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_foldbykey.log -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_foldbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_foldbykey.py -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_foldbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_foldbykey.sh -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_groupbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_groupbykey.log -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_groupbykey.py -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_groupbykey.sh -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_reducebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_reducebykey.log -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_reducebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_reducebykey.py -------------------------------------------------------------------------------- /code/chap05/average_by_key_use_reducebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/average_by_key_use_reducebykey.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_action_describe.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_action_describe.log -------------------------------------------------------------------------------- /code/chap05/dataframe_action_describe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_action_describe.py -------------------------------------------------------------------------------- /code/chap05/dataframe_action_describe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_action_describe.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_drop.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_drop.log -------------------------------------------------------------------------------- /code/chap05/dataframe_drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_drop.py -------------------------------------------------------------------------------- /code/chap05/dataframe_drop.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_drop.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_filter.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_filter.log -------------------------------------------------------------------------------- /code/chap05/dataframe_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_filter.py -------------------------------------------------------------------------------- /code/chap05/dataframe_filter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_filter.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_join_cross.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_cross.log -------------------------------------------------------------------------------- /code/chap05/dataframe_join_cross.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_cross.py -------------------------------------------------------------------------------- /code/chap05/dataframe_join_cross.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_cross.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_join_inner.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_inner.log -------------------------------------------------------------------------------- /code/chap05/dataframe_join_inner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_inner.py -------------------------------------------------------------------------------- /code/chap05/dataframe_join_inner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_inner.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_join_left.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_left.log -------------------------------------------------------------------------------- /code/chap05/dataframe_join_left.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_left.py -------------------------------------------------------------------------------- /code/chap05/dataframe_join_left.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_left.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_join_right.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_right.log -------------------------------------------------------------------------------- /code/chap05/dataframe_join_right.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_right.py -------------------------------------------------------------------------------- /code/chap05/dataframe_join_right.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_join_right.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_sql.log -------------------------------------------------------------------------------- /code/chap05/dataframe_sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_sql.py -------------------------------------------------------------------------------- /code/chap05/dataframe_sql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_sql.sh -------------------------------------------------------------------------------- /code/chap05/dataframe_withcolumn.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_withcolumn.log -------------------------------------------------------------------------------- /code/chap05/dataframe_withcolumn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_withcolumn.py -------------------------------------------------------------------------------- /code/chap05/dataframe_withcolumn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/dataframe_withcolumn.sh -------------------------------------------------------------------------------- /code/chap05/emps.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/emps.txt -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_cartesian.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_cartesian.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_cartesian.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_cartesian.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_cartesian.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_cartesian.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_combinebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_combinebykey.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_combinebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_combinebykey.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_combinebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_combinebykey.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_filter.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_filter.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_filter.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_filter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_filter.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_flatmap.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_flatmap.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_flatmap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_flatmap.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_flatmap.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_flatmap.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_groupbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_groupbykey.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_groupbykey.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_groupbykey.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_join.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_join.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_join.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_join.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_join.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_join.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_map.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_map.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_map.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_map.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_map.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_mappartitions.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_mappartitions.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_mappartitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_mappartitions.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_mappartitions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_mappartitions.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_mappartitions_handle_empty_partitions.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_mappartitions_handle_empty_partitions.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_mappartitions_handle_empty_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_mappartitions_handle_empty_partitions.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_mappartitions_handle_empty_partitions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_mappartitions_handle_empty_partitions.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_reducebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_reducebykey.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_reducebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_reducebykey.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_reducebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_reducebykey.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_sortby.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_sortby.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_sortby.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_sortby.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_sortby.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_sortby.sh -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_takeordered.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_takeordered.log -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_takeordered.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_takeordered.py -------------------------------------------------------------------------------- /code/chap05/rdd_transformation_takeordered.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/rdd_transformation_takeordered.sh -------------------------------------------------------------------------------- /code/chap05/sample_5_records.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/sample_5_records.txt -------------------------------------------------------------------------------- /code/chap05/users.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap05/users.txt -------------------------------------------------------------------------------- /code/chap06/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/README.md -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_aggregatebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_aggregatebykey.py -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_aggregatebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_aggregatebykey.sh -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_combinebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_combinebykey.py -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_combinebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_combinebykey.sh -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_groupbykey.py -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_groupbykey.sh -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_reducebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_reducebykey.py -------------------------------------------------------------------------------- /code/chap06/average_by_key_use_reducebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap06/average_by_key_use_reducebykey.sh -------------------------------------------------------------------------------- /code/chap07/WorldCupPlayers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/WorldCupPlayers.csv -------------------------------------------------------------------------------- /code/chap07/WorldCupPlayers.csv.data.source: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/WorldCupPlayers.csv.data.source -------------------------------------------------------------------------------- /code/chap07/WorldCupPlayers.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/WorldCupPlayers.csv.zip -------------------------------------------------------------------------------- /code/chap07/customers.RECORD.FORMAT.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/customers.RECORD.FORMAT.txt -------------------------------------------------------------------------------- /code/chap07/customers.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/customers.txt -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_add_columns.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_add_columns.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_add_columns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_add_columns.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_add_columns.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_add_columns.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_aggregate_multiple_columns.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_aggregate_multiple_columns.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_aggregate_multiple_columns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_aggregate_multiple_columns.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_aggregate_multiple_columns.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_aggregate_multiple_columns.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_aggregate_single_column.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_aggregate_single_column.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_aggregate_single_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_aggregate_single_column.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_aggregate_single_column.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_aggregate_single_column.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_call_udf.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_call_udf.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_call_udf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_call_udf.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_call_udf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_call_udf.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_cvs_no_header.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_cvs_no_header.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_cvs_no_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_cvs_no_header.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_cvs_no_header.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_cvs_no_header.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_cvs_with_header.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_cvs_with_header.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_cvs_with_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_cvs_with_header.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_cvs_with_header.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_cvs_with_header.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_collections.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_collections.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_collections.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_collections.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_collections.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_collections.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_pandas.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_pandas.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_pandas.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_pandas.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_pandas.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_rows.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_rows.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_rows.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_rows.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_from_rows.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_from_rows.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_order_by.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_order_by.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_order_by.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_order_by.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_order_by.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_order_by.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_with_explicit_schema.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_with_explicit_schema.log -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_with_explicit_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_with_explicit_schema.py -------------------------------------------------------------------------------- /code/chap07/dataframe_creation_with_explicit_schema.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_creation_with_explicit_schema.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_crosstab.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_crosstab.log -------------------------------------------------------------------------------- /code/chap07/dataframe_crosstab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_crosstab.py -------------------------------------------------------------------------------- /code/chap07/dataframe_crosstab.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_crosstab.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_drop_column.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_drop_column.log -------------------------------------------------------------------------------- /code/chap07/dataframe_drop_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_drop_column.py -------------------------------------------------------------------------------- /code/chap07/dataframe_drop_column.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_drop_column.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_drop_duplicates.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_drop_duplicates.log -------------------------------------------------------------------------------- /code/chap07/dataframe_drop_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_drop_duplicates.py -------------------------------------------------------------------------------- /code/chap07/dataframe_drop_duplicates.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_drop_duplicates.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_multi_dim_agg_groupby.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_multi_dim_agg_groupby.log -------------------------------------------------------------------------------- /code/chap07/dataframe_multi_dim_agg_groupby.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_multi_dim_agg_groupby.py -------------------------------------------------------------------------------- /code/chap07/dataframe_multi_dim_agg_groupby.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_multi_dim_agg_groupby.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_multi_dim_agg_rollup.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_multi_dim_agg_rollup.log -------------------------------------------------------------------------------- /code/chap07/dataframe_multi_dim_agg_rollup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_multi_dim_agg_rollup.py -------------------------------------------------------------------------------- /code/chap07/dataframe_multi_dim_agg_rollup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_multi_dim_agg_rollup.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_tutorial_with_worldcup.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_tutorial_with_worldcup.log -------------------------------------------------------------------------------- /code/chap07/dataframe_tutorial_with_worldcup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_tutorial_with_worldcup.py -------------------------------------------------------------------------------- /code/chap07/dataframe_tutorial_with_worldcup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_tutorial_with_worldcup.sh -------------------------------------------------------------------------------- /code/chap07/dataframe_with_statistical_data.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_with_statistical_data.log -------------------------------------------------------------------------------- /code/chap07/dataframe_with_statistical_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_with_statistical_data.py -------------------------------------------------------------------------------- /code/chap07/dataframe_with_statistical_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/dataframe_with_statistical_data.sh -------------------------------------------------------------------------------- /code/chap07/emps_no_header.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/emps_no_header.txt -------------------------------------------------------------------------------- /code/chap07/emps_with_header.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/emps_with_header.txt -------------------------------------------------------------------------------- /code/chap07/life_expentancy.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/life_expentancy.txt -------------------------------------------------------------------------------- /code/chap07/partition_data_by_customer_and_year.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/partition_data_by_customer_and_year.log -------------------------------------------------------------------------------- /code/chap07/partition_data_by_customer_and_year.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/partition_data_by_customer_and_year.py -------------------------------------------------------------------------------- /code/chap07/partition_data_by_customer_and_year.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/partition_data_by_customer_and_year.sh -------------------------------------------------------------------------------- /code/chap07/partition_data_by_customer_and_year_single_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/partition_data_by_customer_and_year_single_file.py -------------------------------------------------------------------------------- /code/chap07/strings-2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/strings-2.parquet -------------------------------------------------------------------------------- /code/chap07/users.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/users.parquet -------------------------------------------------------------------------------- /code/chap07/users4.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap07/users4.parquet -------------------------------------------------------------------------------- /code/chap08/cats.no.header.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/cats.no.header.csv -------------------------------------------------------------------------------- /code/chap08/cats.with.header.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/cats.with.header.csv -------------------------------------------------------------------------------- /code/chap08/datasource_csv_reader_no_header.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_reader_no_header.log -------------------------------------------------------------------------------- /code/chap08/datasource_csv_reader_no_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_reader_no_header.py -------------------------------------------------------------------------------- /code/chap08/datasource_csv_reader_no_header.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_reader_no_header.sh -------------------------------------------------------------------------------- /code/chap08/datasource_csv_reader_with_header.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_reader_with_header.log -------------------------------------------------------------------------------- /code/chap08/datasource_csv_reader_with_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_reader_with_header.py -------------------------------------------------------------------------------- /code/chap08/datasource_csv_reader_with_header.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_reader_with_header.sh -------------------------------------------------------------------------------- /code/chap08/datasource_csv_writer.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_writer.log -------------------------------------------------------------------------------- /code/chap08/datasource_csv_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_writer.py -------------------------------------------------------------------------------- /code/chap08/datasource_csv_writer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_csv_writer.sh -------------------------------------------------------------------------------- /code/chap08/datasource_elasticsearch_reader.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_elasticsearch_reader.log -------------------------------------------------------------------------------- /code/chap08/datasource_elasticsearch_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_elasticsearch_reader.py -------------------------------------------------------------------------------- /code/chap08/datasource_elasticsearch_reader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_elasticsearch_reader.sh -------------------------------------------------------------------------------- /code/chap08/datasource_elasticsearch_writer.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_elasticsearch_writer.log -------------------------------------------------------------------------------- /code/chap08/datasource_elasticsearch_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_elasticsearch_writer.py -------------------------------------------------------------------------------- /code/chap08/datasource_elasticsearch_writer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_elasticsearch_writer.sh -------------------------------------------------------------------------------- /code/chap08/datasource_gzip_reader.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_gzip_reader.log -------------------------------------------------------------------------------- /code/chap08/datasource_gzip_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_gzip_reader.py -------------------------------------------------------------------------------- /code/chap08/datasource_gzip_reader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_gzip_reader.sh -------------------------------------------------------------------------------- /code/chap08/datasource_jdbc_reader.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_jdbc_reader.log -------------------------------------------------------------------------------- /code/chap08/datasource_jdbc_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_jdbc_reader.py -------------------------------------------------------------------------------- /code/chap08/datasource_jdbc_reader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_jdbc_reader.sh -------------------------------------------------------------------------------- /code/chap08/datasource_jdbc_writer.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_jdbc_writer.log -------------------------------------------------------------------------------- /code/chap08/datasource_jdbc_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_jdbc_writer.py -------------------------------------------------------------------------------- /code/chap08/datasource_jdbc_writer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_jdbc_writer.sh -------------------------------------------------------------------------------- /code/chap08/datasource_json_reader_multi_line.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_json_reader_multi_line.log -------------------------------------------------------------------------------- /code/chap08/datasource_json_reader_multi_line.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_json_reader_multi_line.py -------------------------------------------------------------------------------- /code/chap08/datasource_json_reader_multi_line.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_json_reader_multi_line.sh -------------------------------------------------------------------------------- /code/chap08/datasource_json_reader_single_line.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_json_reader_single_line.log -------------------------------------------------------------------------------- /code/chap08/datasource_json_reader_single_line.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_json_reader_single_line.py -------------------------------------------------------------------------------- /code/chap08/datasource_json_reader_single_line.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_json_reader_single_line.sh -------------------------------------------------------------------------------- /code/chap08/datasource_mongodb_reader.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_mongodb_reader.log -------------------------------------------------------------------------------- /code/chap08/datasource_mongodb_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_mongodb_reader.py -------------------------------------------------------------------------------- /code/chap08/datasource_mongodb_reader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_mongodb_reader.sh -------------------------------------------------------------------------------- /code/chap08/datasource_mongodb_writer.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_mongodb_writer.log -------------------------------------------------------------------------------- /code/chap08/datasource_mongodb_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_mongodb_writer.py -------------------------------------------------------------------------------- /code/chap08/datasource_mongodb_writer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_mongodb_writer.sh -------------------------------------------------------------------------------- /code/chap08/datasource_redis_reader.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_redis_reader.log -------------------------------------------------------------------------------- /code/chap08/datasource_redis_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_redis_reader.py -------------------------------------------------------------------------------- /code/chap08/datasource_redis_reader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_redis_reader.sh -------------------------------------------------------------------------------- /code/chap08/datasource_redis_writer.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_redis_writer.log -------------------------------------------------------------------------------- /code/chap08/datasource_redis_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_redis_writer.py -------------------------------------------------------------------------------- /code/chap08/datasource_redis_writer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_redis_writer.sh -------------------------------------------------------------------------------- /code/chap08/datasource_textfile_reader.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_textfile_reader.log -------------------------------------------------------------------------------- /code/chap08/datasource_textfile_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_textfile_reader.py -------------------------------------------------------------------------------- /code/chap08/datasource_textfile_reader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_textfile_reader.sh -------------------------------------------------------------------------------- /code/chap08/datasource_textfile_writer.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_textfile_writer.log -------------------------------------------------------------------------------- /code/chap08/datasource_textfile_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_textfile_writer.py -------------------------------------------------------------------------------- /code/chap08/datasource_textfile_writer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/datasource_textfile_writer.sh -------------------------------------------------------------------------------- /code/chap08/images/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/images/cat1.jpg -------------------------------------------------------------------------------- /code/chap08/images/cat2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/images/cat2.jpg -------------------------------------------------------------------------------- /code/chap08/images/cat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/images/cat3.jpg -------------------------------------------------------------------------------- /code/chap08/images/cat4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/images/cat4.jpg -------------------------------------------------------------------------------- /code/chap08/images/duck1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/images/duck1.jpg -------------------------------------------------------------------------------- /code/chap08/images/duck2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/images/duck2.jpg -------------------------------------------------------------------------------- /code/chap08/images/not-image.txt: -------------------------------------------------------------------------------- 1 | not an image 2 | -------------------------------------------------------------------------------- /code/chap08/mongodb_coll44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/mongodb_coll44.png -------------------------------------------------------------------------------- /code/chap08/mongodb_coll66.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/mongodb_coll66.png -------------------------------------------------------------------------------- /code/chap08/name_age_salary.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/name_age_salary.csv -------------------------------------------------------------------------------- /code/chap08/people.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/people.txt -------------------------------------------------------------------------------- /code/chap08/sample_multi_line.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/sample_multi_line.json -------------------------------------------------------------------------------- /code/chap08/sample_no_header.csv: -------------------------------------------------------------------------------- 1 | Alex,Sunnyvale,30 2 | Mary,Cupertino,28 3 | Jane,Stanford,44 4 | Bob,Ames,33 5 | -------------------------------------------------------------------------------- /code/chap08/sample_numbers.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/sample_numbers.txt -------------------------------------------------------------------------------- /code/chap08/sample_single_line.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/sample_single_line.json -------------------------------------------------------------------------------- /code/chap08/sample_with_header.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/sample_with_header.csv -------------------------------------------------------------------------------- /code/chap08/twitter.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap08/twitter.avro -------------------------------------------------------------------------------- /code/chap09/logistic_regression_builder.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/logistic_regression_builder.log -------------------------------------------------------------------------------- /code/chap09/logistic_regression_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/logistic_regression_builder.py -------------------------------------------------------------------------------- /code/chap09/logistic_regression_builder.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/logistic_regression_builder.sh -------------------------------------------------------------------------------- /code/chap09/logistic_regression_predictor.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/logistic_regression_predictor.log -------------------------------------------------------------------------------- /code/chap09/logistic_regression_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/logistic_regression_predictor.py -------------------------------------------------------------------------------- /code/chap09/logistic_regression_predictor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/logistic_regression_predictor.sh -------------------------------------------------------------------------------- /code/chap09/model/data/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /code/chap09/model/data/.part-00000-1d219005-0cb4-4a77-98bf-2f69a69655a1-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/model/data/.part-00000-1d219005-0cb4-4a77-98bf-2f69a69655a1-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /code/chap09/model/data/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/chap09/model/data/part-00000-1d219005-0cb4-4a77-98bf-2f69a69655a1-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/model/data/part-00000-1d219005-0cb4-4a77-98bf-2f69a69655a1-c000.snappy.parquet -------------------------------------------------------------------------------- /code/chap09/model/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /code/chap09/model/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/model/metadata/.part-00000.crc -------------------------------------------------------------------------------- /code/chap09/model/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/chap09/model/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/model/metadata/part-00000 -------------------------------------------------------------------------------- /code/chap09/new_emails.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/new_emails.txt -------------------------------------------------------------------------------- /code/chap09/training_emails_nospam.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/training_emails_nospam.txt -------------------------------------------------------------------------------- /code/chap09/training_emails_spam.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap09/training_emails_spam.txt -------------------------------------------------------------------------------- /code/chap10/recommendation_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap10/recommendation_example.py -------------------------------------------------------------------------------- /code/chap10/test.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap10/test.data -------------------------------------------------------------------------------- /code/chap11/airports.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/airports.json -------------------------------------------------------------------------------- /code/chap11/breadth_first_search_example.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/breadth_first_search_example.log -------------------------------------------------------------------------------- /code/chap11/breadth_first_search_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/breadth_first_search_example.py -------------------------------------------------------------------------------- /code/chap11/breadth_first_search_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/breadth_first_search_example.sh -------------------------------------------------------------------------------- /code/chap11/connected_component_example.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/connected_component_example.log -------------------------------------------------------------------------------- /code/chap11/connected_component_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/connected_component_example.py -------------------------------------------------------------------------------- /code/chap11/connected_component_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/connected_component_example.sh -------------------------------------------------------------------------------- /code/chap11/flightdata2018.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/flightdata2018.json -------------------------------------------------------------------------------- /code/chap11/graph_builder.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/graph_builder.log -------------------------------------------------------------------------------- /code/chap11/graph_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/graph_builder.py -------------------------------------------------------------------------------- /code/chap11/graph_builder.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/graph_builder.sh -------------------------------------------------------------------------------- /code/chap11/label_propagation_algorithm_example.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/label_propagation_algorithm_example.log -------------------------------------------------------------------------------- /code/chap11/label_propagation_algorithm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/label_propagation_algorithm_example.py -------------------------------------------------------------------------------- /code/chap11/label_propagation_algorithm_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/label_propagation_algorithm_example.sh -------------------------------------------------------------------------------- /code/chap11/pagerank.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/pagerank.py -------------------------------------------------------------------------------- /code/chap11/pagerank_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/pagerank_data.txt -------------------------------------------------------------------------------- /code/chap11/pagerank_example.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/pagerank_example.log -------------------------------------------------------------------------------- /code/chap11/pagerank_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/pagerank_example.py -------------------------------------------------------------------------------- /code/chap11/pagerank_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/pagerank_example.sh -------------------------------------------------------------------------------- /code/chap11/sample_graph_edges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/sample_graph_edges.txt -------------------------------------------------------------------------------- /code/chap11/sample_graph_vertices.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/sample_graph_vertices.txt -------------------------------------------------------------------------------- /code/chap11/shortest_path_finder.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/shortest_path_finder.log -------------------------------------------------------------------------------- /code/chap11/shortest_path_finder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/shortest_path_finder.py -------------------------------------------------------------------------------- /code/chap11/shortest_path_finder.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/shortest_path_finder.sh -------------------------------------------------------------------------------- /code/chap11/triangles_counter.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/triangles_counter.log -------------------------------------------------------------------------------- /code/chap11/triangles_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/triangles_counter.py -------------------------------------------------------------------------------- /code/chap11/triangles_counter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/triangles_counter.sh -------------------------------------------------------------------------------- /code/chap11/unique_triangles_finder.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/unique_triangles_finder.log -------------------------------------------------------------------------------- /code/chap11/unique_triangles_finder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/unique_triangles_finder.py -------------------------------------------------------------------------------- /code/chap11/unique_triangles_finder.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap11/unique_triangles_finder.sh -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_aggregatebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_aggregatebykey.log -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_aggregatebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_aggregatebykey.py -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_aggregatebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_aggregatebykey.sh -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_combinebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_combinebykey.log -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_combinebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_combinebykey.py -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_combinebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_combinebykey.sh -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_groupbykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_groupbykey.log -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_groupbykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_groupbykey.py -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_groupbykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_groupbykey.sh -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_reducebykey.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_reducebykey.log -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_reducebykey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_reducebykey.py -------------------------------------------------------------------------------- /code/chap12/average_monoid_use_reducebykey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/average_monoid_use_reducebykey.sh -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_local_aggregation.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_local_aggregation.log -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_local_aggregation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_local_aggregation.py -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_local_aggregation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_local_aggregation.sh -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_use_basic_mapreduce.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_use_basic_mapreduce.log -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_use_basic_mapreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_use_basic_mapreduce.py -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_use_basic_mapreduce.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_use_basic_mapreduce.sh -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_use_mappartitions.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_use_mappartitions.log -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_use_mappartitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_use_mappartitions.py -------------------------------------------------------------------------------- /code/chap12/inmapper_combiner_use_mappartitions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/inmapper_combiner_use_mappartitions.sh -------------------------------------------------------------------------------- /code/chap12/minmax_force_empty_partitions.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/minmax_force_empty_partitions.log -------------------------------------------------------------------------------- /code/chap12/minmax_force_empty_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/minmax_force_empty_partitions.py -------------------------------------------------------------------------------- /code/chap12/minmax_force_empty_partitions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/minmax_force_empty_partitions.sh -------------------------------------------------------------------------------- /code/chap12/minmax_use_mappartitions.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/minmax_use_mappartitions.log -------------------------------------------------------------------------------- /code/chap12/minmax_use_mappartitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/minmax_use_mappartitions.py -------------------------------------------------------------------------------- /code/chap12/minmax_use_mappartitions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/minmax_use_mappartitions.sh -------------------------------------------------------------------------------- /code/chap12/sample_dna_seq.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/sample_dna_seq.txt -------------------------------------------------------------------------------- /code/chap12/sample_input.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/sample_input.txt -------------------------------------------------------------------------------- /code/chap12/sample_numbers.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/sample_numbers.txt -------------------------------------------------------------------------------- /code/chap12/top_N_use_mappartitions.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/top_N_use_mappartitions.log -------------------------------------------------------------------------------- /code/chap12/top_N_use_mappartitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/top_N_use_mappartitions.py -------------------------------------------------------------------------------- /code/chap12/top_N_use_mappartitions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/top_N_use_mappartitions.sh -------------------------------------------------------------------------------- /code/chap12/top_N_use_takeordered.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/top_N_use_takeordered.log -------------------------------------------------------------------------------- /code/chap12/top_N_use_takeordered.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/top_N_use_takeordered.py -------------------------------------------------------------------------------- /code/chap12/top_N_use_takeordered.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/chap12/top_N_use_takeordered.sh -------------------------------------------------------------------------------- /code/examples/wordcount/foxdata.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/examples/wordcount/foxdata.txt -------------------------------------------------------------------------------- /code/examples/wordcount/wordcount.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/examples/wordcount/wordcount.py -------------------------------------------------------------------------------- /code/examples/wordcount/wordcount.py.usage: -------------------------------------------------------------------------------- 1 | ./bin/spark-submit wordcount.py foxdata.txt /tmp/output 2 | -------------------------------------------------------------------------------- /code/jars/avro-mapred-1.7.7-hadoop1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/avro-mapred-1.7.7-hadoop1.jar -------------------------------------------------------------------------------- /code/jars/avro-mapred-1.7.7-hadoop2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/avro-mapred-1.7.7-hadoop2.jar -------------------------------------------------------------------------------- /code/jars/com-cotdp-hadoop-1.0-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/com-cotdp-hadoop-1.0-SNAPSHOT.jar -------------------------------------------------------------------------------- /code/jars/elasticsearch-hadoop-6.4.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/elasticsearch-hadoop-6.4.2.jar -------------------------------------------------------------------------------- /code/jars/elasticsearch-spark_2.11-2.4.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/elasticsearch-spark_2.11-2.4.5.jar -------------------------------------------------------------------------------- /code/jars/graphframes-0.6.0-spark2.3-s_2.11.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/graphframes-0.6.0-spark2.3-s_2.11.jar -------------------------------------------------------------------------------- /code/jars/hbase-spark-connector-1.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/hbase-spark-connector-1.0.0.jar -------------------------------------------------------------------------------- /code/jars/htrace-core-3.1.0-incubating.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/htrace-core-3.1.0-incubating.jar -------------------------------------------------------------------------------- /code/jars/mongo-java-driver-3.8.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/mongo-java-driver-3.8.2.jar -------------------------------------------------------------------------------- /code/jars/mongo-spark-connector_2.11-2.2.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/mongo-spark-connector_2.11-2.2.5.jar -------------------------------------------------------------------------------- /code/jars/mongodb-driver-3.8.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/mongodb-driver-3.8.2.jar -------------------------------------------------------------------------------- /code/jars/mysql-connector-java-5.1.42.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/mysql-connector-java-5.1.42.jar -------------------------------------------------------------------------------- /code/jars/shc-core-1.1.3-2.3-s_2.11.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/shc-core-1.1.3-2.3-s_2.11.jar -------------------------------------------------------------------------------- /code/jars/shc-examples-1.1.3-2.3-s_2.11.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/shc-examples-1.1.3-2.3-s_2.11.jar -------------------------------------------------------------------------------- /code/jars/spark-redis-2.3.1-SNAPSHOT-jar-with-dependencies.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/spark-redis-2.3.1-SNAPSHOT-jar-with-dependencies.jar -------------------------------------------------------------------------------- /code/jars/spark-redis-2.3.1-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/code/jars/spark-redis-2.3.1-SNAPSHOT.jar -------------------------------------------------------------------------------- /images/pyspark_algorithms.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/images/pyspark_algorithms.jpg -------------------------------------------------------------------------------- /images/pyspark_algorithms0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/images/pyspark_algorithms0.jpg -------------------------------------------------------------------------------- /images/pyspark_algorithms2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/images/pyspark_algorithms2.jpg -------------------------------------------------------------------------------- /images/pyspark_algorithms3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/images/pyspark_algorithms3.jpg -------------------------------------------------------------------------------- /sample_chapters/Appendix_Questions_and_Answers.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/sample_chapters/Appendix_Questions_and_Answers.epub -------------------------------------------------------------------------------- /sample_chapters/Appendix_Questions_and_Answers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/sample_chapters/Appendix_Questions_and_Answers.pdf -------------------------------------------------------------------------------- /sample_chapters/README.md: -------------------------------------------------------------------------------- 1 | ## Sample Chapters of PySpark Algorithms Book 2 | -------------------------------------------------------------------------------- /sample_chapters/chap04_Getting_Started_with_PySpark.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/sample_chapters/chap04_Getting_Started_with_PySpark.epub -------------------------------------------------------------------------------- /sample_chapters/chap04_Getting_Started_with_PySpark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/sample_chapters/chap04_Getting_Started_with_PySpark.pdf -------------------------------------------------------------------------------- /where_to_buy_book/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoudparsian/pyspark-algorithms/HEAD/where_to_buy_book/README.md --------------------------------------------------------------------------------