├── GradientBoostedTrees.py ├── LICENSE.txt ├── README.md ├── RandomForests.py ├── Spark2.0 ├── Readme.md ├── Saprk_anomalies_detction.ipynb ├── anomalies_detection.py ├── anomalies_detection_spark_streaming.py ├── how_to_define_spark.py ├── kmeans_test.csv ├── kmeans_train.csv ├── logs-features-sample.zip ├── simulate_streaming_input.ipynb ├── spark_MLPipeline.ipynb ├── spark_kmeans_streaming.ipynb ├── spark_streaming_anomalies_detection.ipynb ├── spark_streaming_word_count.ipynb └── streaming_twitter_sentiment.ipynb ├── Spark3+ ├── ReadMe.md └── spark_window.ipynb ├── als.py ├── amazon_review_tfidf.py ├── amazon_review_tfidf_normalized.py ├── anomalies_detection.py ├── anomalies_detection_data_sample.txt ├── correlate-logs-better.py ├── correlate-logs.py ├── entity_resolution.py ├── euler.py ├── image_classification.py ├── itemsets.py ├── kernelized_svm.py ├── linear_svm.py ├── load_logs_sql.py ├── matrix_data.txt ├── matrix_data_sparse.txt ├── matrix_multiply.py ├── matrix_multiply_sparse.py ├── model_visualization.py ├── movie_recommendations.py ├── random_forest_with_bagging.py ├── read_stream.py ├── reddit-averages.py ├── reddit_average_sql.py ├── relative-score-bcast.py ├── relative-score.py ├── shortest_path.py ├── slope_one.py ├── spark_ml_pipline.py ├── temp_range.py ├── temp_range_sql.py ├── tfidf_cv_lowestRMSE.py ├── tfidf_cv_lowestRMSE_normalized.py ├── word2vec.py ├── word2vec_best_RMSE.py ├── word2vec_histogram_best_RMSE.py ├── word2vec_kmeans.py └── wordcount-improved.py /GradientBoostedTrees.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/GradientBoostedTrees.py -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/README.md -------------------------------------------------------------------------------- /RandomForests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/RandomForests.py -------------------------------------------------------------------------------- /Spark2.0/Readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/Readme.md -------------------------------------------------------------------------------- /Spark2.0/Saprk_anomalies_detction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/Saprk_anomalies_detction.ipynb -------------------------------------------------------------------------------- /Spark2.0/anomalies_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/anomalies_detection.py -------------------------------------------------------------------------------- /Spark2.0/anomalies_detection_spark_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/anomalies_detection_spark_streaming.py -------------------------------------------------------------------------------- /Spark2.0/how_to_define_spark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/how_to_define_spark.py -------------------------------------------------------------------------------- /Spark2.0/kmeans_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/kmeans_test.csv -------------------------------------------------------------------------------- /Spark2.0/kmeans_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/kmeans_train.csv -------------------------------------------------------------------------------- /Spark2.0/logs-features-sample.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/logs-features-sample.zip -------------------------------------------------------------------------------- /Spark2.0/simulate_streaming_input.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/simulate_streaming_input.ipynb -------------------------------------------------------------------------------- /Spark2.0/spark_MLPipeline.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/spark_MLPipeline.ipynb -------------------------------------------------------------------------------- /Spark2.0/spark_kmeans_streaming.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/spark_kmeans_streaming.ipynb -------------------------------------------------------------------------------- /Spark2.0/spark_streaming_anomalies_detection.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/spark_streaming_anomalies_detection.ipynb -------------------------------------------------------------------------------- /Spark2.0/spark_streaming_word_count.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/spark_streaming_word_count.ipynb -------------------------------------------------------------------------------- /Spark2.0/streaming_twitter_sentiment.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark2.0/streaming_twitter_sentiment.ipynb -------------------------------------------------------------------------------- /Spark3+/ReadMe.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark3+/ReadMe.md -------------------------------------------------------------------------------- /Spark3+/spark_window.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/Spark3+/spark_window.ipynb -------------------------------------------------------------------------------- /als.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/als.py -------------------------------------------------------------------------------- /amazon_review_tfidf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/amazon_review_tfidf.py -------------------------------------------------------------------------------- /amazon_review_tfidf_normalized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/amazon_review_tfidf_normalized.py -------------------------------------------------------------------------------- /anomalies_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/anomalies_detection.py -------------------------------------------------------------------------------- /anomalies_detection_data_sample.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/anomalies_detection_data_sample.txt -------------------------------------------------------------------------------- /correlate-logs-better.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/correlate-logs-better.py -------------------------------------------------------------------------------- /correlate-logs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/correlate-logs.py -------------------------------------------------------------------------------- /entity_resolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/entity_resolution.py -------------------------------------------------------------------------------- /euler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/euler.py -------------------------------------------------------------------------------- /image_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/image_classification.py -------------------------------------------------------------------------------- /itemsets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/itemsets.py -------------------------------------------------------------------------------- /kernelized_svm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/kernelized_svm.py -------------------------------------------------------------------------------- /linear_svm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/linear_svm.py -------------------------------------------------------------------------------- /load_logs_sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/load_logs_sql.py -------------------------------------------------------------------------------- /matrix_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/matrix_data.txt -------------------------------------------------------------------------------- /matrix_data_sparse.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/matrix_data_sparse.txt -------------------------------------------------------------------------------- /matrix_multiply.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/matrix_multiply.py -------------------------------------------------------------------------------- /matrix_multiply_sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/matrix_multiply_sparse.py -------------------------------------------------------------------------------- /model_visualization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/model_visualization.py -------------------------------------------------------------------------------- /movie_recommendations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/movie_recommendations.py -------------------------------------------------------------------------------- /random_forest_with_bagging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/random_forest_with_bagging.py -------------------------------------------------------------------------------- /read_stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/read_stream.py -------------------------------------------------------------------------------- /reddit-averages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/reddit-averages.py -------------------------------------------------------------------------------- /reddit_average_sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/reddit_average_sql.py -------------------------------------------------------------------------------- /relative-score-bcast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/relative-score-bcast.py -------------------------------------------------------------------------------- /relative-score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/relative-score.py -------------------------------------------------------------------------------- /shortest_path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/shortest_path.py -------------------------------------------------------------------------------- /slope_one.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/slope_one.py -------------------------------------------------------------------------------- /spark_ml_pipline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/spark_ml_pipline.py -------------------------------------------------------------------------------- /temp_range.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/temp_range.py -------------------------------------------------------------------------------- /temp_range_sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/temp_range_sql.py -------------------------------------------------------------------------------- /tfidf_cv_lowestRMSE.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/tfidf_cv_lowestRMSE.py -------------------------------------------------------------------------------- /tfidf_cv_lowestRMSE_normalized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/tfidf_cv_lowestRMSE_normalized.py -------------------------------------------------------------------------------- /word2vec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/word2vec.py -------------------------------------------------------------------------------- /word2vec_best_RMSE.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/word2vec_best_RMSE.py -------------------------------------------------------------------------------- /word2vec_histogram_best_RMSE.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/word2vec_histogram_best_RMSE.py -------------------------------------------------------------------------------- /word2vec_kmeans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/word2vec_kmeans.py -------------------------------------------------------------------------------- /wordcount-improved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanhanwu/Hanhan-Spark-Python/HEAD/wordcount-improved.py --------------------------------------------------------------------------------