├── .gitignore ├── README.md ├── apache_spark ├── notebooks │ ├── spark_examples.ipynb │ └── utils.py ├── references │ ├── Apache Hadoop YARN- Yet Another Resource Negotiator.pdf │ ├── High Performance Spark.pdf │ ├── Resilient Distributed Datasets- A Fault-Tolerant Abstraction for In-Memory Cluster Computing.pdf │ └── Spark- Cluster Computing with Working Sets.pdf └── slides │ └── spark.pdf ├── common ├── sparkxgb.zip ├── utils │ ├── metrics.py │ └── processing.py ├── xgboost4j-0.72.jar └── xgboost4j-spark-0.72.jar ├── controlled_experiments └── references │ ├── Controlled experiments on the web- survey and practical guide.pdf │ ├── Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms.pdf │ ├── Online Controlled Experiments and A:B Testing.pdf │ ├── Online Controlled Experiments at Large Scale.pdf │ ├── Overlapping Experiment Infrastructure- More, Better, Faster Experimentation.pdf │ └── Statistical Comparisons of Classifiers over Multiple Data Sets.pdf ├── distributed_ml_intro ├── references │ ├── A Survey on Distributed Machine Learning.pdf │ ├── Horovod- fast and easy distributed deep learning in TensorFlow.pdf │ ├── Large Scale Distributed Deep Networks.pdf │ ├── Large-Scale Machine Learning with Stochastic Gradient Descent.pdf │ ├── MLlib- Machine Learning in Apache Spark.pdf │ ├── Map-Reduce for Machine Learning on Multicore.pdf │ ├── Parallelized Stochastic Gradient Descent.pdf │ ├── Scaling Distributed Machine Learning with the Parameter Server.pdf │ ├── SparkNet- Training Deep Networks in Spark.pdf │ ├── TensorFlow- A System for Large-Scale Machine Learning.pdf │ ├── TensorFlow- Large-Scale Machine Learning on Heterogeneous Distributed Systems.pdf │ └── The Tradeoffs of Large Scale Learning.pdf └── slides │ └── distributed_ml_intro.pdf ├── dnn_compression_acceleration ├── notebooks │ ├── README.md │ ├── student_model_train.ipynb │ ├── teacher_model_train_full.ipynb │ └── teacher_model_train_small.ipynb ├── references │ ├── A Survey of Model Compression and Acceleration for Deep Neural Networks.pdf │ ├── Deep Compression.pdf │ ├── DeepGBM.pdf │ ├── Distilling the Knowledge in a Neural Network.pdf │ ├── Do Deep Nets Really Need to be Deep?.pdf │ ├── Learning both Weights and Connections for Efficient Neural Networks.pdf │ └── The Lottery Ticket Hypothesis.pdf ├── slides │ └── dnn_compression.pdf └── videos │ └── README.md ├── docker ├── Docker-tutorial.md ├── Dockerfile ├── build.sh ├── requirements.txt ├── run.sh └── start.sh ├── gradient_boosting ├── notebooks │ ├── sparkxgb.zip │ ├── utils │ │ ├── metrics.py │ │ └── processing.py │ ├── xgboost.ipynb │ ├── xgboost4j-0.72.jar │ └── xgboost4j-spark-0.72.jar ├── references │ ├── A Unified Approach to Interpreting Model Predictions.pdf │ ├── CatBoost- gradient boosting with categorical features support.pdf │ ├── Greedy Function Approximation- A Gradient Boosting Machine.pdf │ ├── PLANET- Massively Parallel Learning of Tree Ensembles with MapReduce.pdf │ ├── Practical Lessons from Predicting Clicks on Ads at Facebook.pdf │ ├── Stochastic Gradient Boosting.pdf │ └── XGBoost- A Scalable Tree Boosting System.pdf └── slides │ ├── mean_target_encoding.pdf │ └── test_categorical_features.pdf ├── hadoop_map_reduce ├── notebooks │ └── start_spark_example.ipynb ├── references │ ├── Hive - A Warehousing Solution Over a Map-Reduce Framework.pdf │ ├── Map-Reduce for Machine Learning on Multicore.pdf │ ├── MapReduce is Good Enough?.pdf │ ├── MapReduce- Simplified Data Processing on Large Clusters.pdf │ ├── The Google File System.pdf │ └── The Hadoop Distributed File System.pdf └── slides │ ├── gfs.pdf │ └── map_reduce.pdf ├── hyperparameters_optimization ├── notebooks │ └── hyperopt.ipynb ├── references │ ├── A Tutorial on Bayesian Optimization.pdf │ ├── AML.pdf │ ├── Algorithms for Hyper-Parameter Optimization.pdf │ ├── Gaussian Processes in Machine Learning.pdf │ ├── Google Vizier- A Service for Black-Box Optimization.pdf │ ├── Multi-Task Bayesian Optimization.pdf │ ├── Random Search for Hyper-Parameter Optimization.pdf │ └── Speeding up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves.pdf └── slides │ └── hyperopt.pdf ├── intro └── slides │ └── intro.pdf ├── nearest_neighbor_search ├── references │ ├── Annoy.pdf │ ├── Approximate Nearest Neighbor Search on High Dimensional Data.pdf │ ├── Approximate nearest neighbor algorithm based on navigable (Information Systems).pdf │ ├── Deep Hashing for Compact Binary Codes Learning.pdf │ ├── Deep Supervised Hashing for Fast Image Retrieval .pdf │ ├── Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs.pdf │ ├── Hashing for Similarity Search: A Survey.pdf │ ├── Mining Massive Datasets - Chapter 3.pdf │ └── Survey of Nearest Neighbor Techniques.pdf ├── slides │ └── nearest_neighbor_search.pdf └── videos │ └── README.md ├── recsys ├── notebooks │ ├── als_example.ipynb │ └── recsys.ipynb ├── references │ ├── CB2CF- A Neural Multiview Content-to-Collaborative Filtering Model for Completely Cold Item Recommendations.pdf │ ├── Collaborative Filtering for Implicit Feedback Datasets.pdf │ ├── Fast Matrix Factorization for Online Recommendation with Implicit Feedback.pdf │ ├── Large-Scale Matrix Factorization with Distributed Stochastic Gradient Descent.pdf │ ├── Neural Collaborative Filtering.pdf │ ├── Probabilistic Matrix Factorization.pdf │ ├── PyTorch BigGraph.pdf │ └── deep-content-based-music-recommendation.pdf ├── slides │ └── recsys.pdf └── videos │ └── README.md ├── sgd_logreg_nn ├── notebooks │ ├── criteo_prepare_dataset.ipynb │ ├── ctr_prediction_mllib.ipynb │ └── ctr_prediction_mllib_practice.ipynb ├── references │ ├── A Sparse Deep Factorization Machine for Efficient CTR prediction.pdf │ ├── Deep & Cross Network for Ad Click Predictions.pdf │ ├── DeepFM.pdf │ ├── FFM in a Real-world Online Advertising System 2.pdf │ ├── Factorization Machines.pdf │ ├── [AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf │ ├── [Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf │ └── [FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf └── slides │ └── categorical_features.pdf ├── spark_sql ├── notebooks │ ├── README.md │ ├── spark_performance.ipynb │ └── spark_sql_examples.ipynb ├── references │ └── Spark SQL- Relational Data Processing in Spark.pdf └── slides │ └── spark_sql.pdf └── v1.1.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | __pycache__ 4 | data/* 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/README.md -------------------------------------------------------------------------------- /apache_spark/notebooks/spark_examples.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/notebooks/spark_examples.ipynb -------------------------------------------------------------------------------- /apache_spark/notebooks/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/notebooks/utils.py -------------------------------------------------------------------------------- /apache_spark/references/Apache Hadoop YARN- Yet Another Resource Negotiator.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/references/Apache Hadoop YARN- Yet Another Resource Negotiator.pdf -------------------------------------------------------------------------------- /apache_spark/references/High Performance Spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/references/High Performance Spark.pdf -------------------------------------------------------------------------------- /apache_spark/references/Resilient Distributed Datasets- A Fault-Tolerant Abstraction for In-Memory Cluster Computing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/references/Resilient Distributed Datasets- A Fault-Tolerant Abstraction for In-Memory Cluster Computing.pdf -------------------------------------------------------------------------------- /apache_spark/references/Spark- Cluster Computing with Working Sets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/references/Spark- Cluster Computing with Working Sets.pdf -------------------------------------------------------------------------------- /apache_spark/slides/spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/apache_spark/slides/spark.pdf -------------------------------------------------------------------------------- /common/sparkxgb.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/common/sparkxgb.zip -------------------------------------------------------------------------------- /common/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/common/utils/metrics.py -------------------------------------------------------------------------------- /common/utils/processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/common/utils/processing.py -------------------------------------------------------------------------------- /common/xgboost4j-0.72.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/common/xgboost4j-0.72.jar -------------------------------------------------------------------------------- /common/xgboost4j-spark-0.72.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/common/xgboost4j-spark-0.72.jar -------------------------------------------------------------------------------- /controlled_experiments/references/Controlled experiments on the web- survey and practical guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/controlled_experiments/references/Controlled experiments on the web- survey and practical guide.pdf -------------------------------------------------------------------------------- /controlled_experiments/references/Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/controlled_experiments/references/Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms.pdf -------------------------------------------------------------------------------- /controlled_experiments/references/Online Controlled Experiments and A:B Testing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/controlled_experiments/references/Online Controlled Experiments and A:B Testing.pdf -------------------------------------------------------------------------------- /controlled_experiments/references/Online Controlled Experiments at Large Scale.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/controlled_experiments/references/Online Controlled Experiments at Large Scale.pdf -------------------------------------------------------------------------------- /controlled_experiments/references/Overlapping Experiment Infrastructure- More, Better, Faster Experimentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/controlled_experiments/references/Overlapping Experiment Infrastructure- More, Better, Faster Experimentation.pdf -------------------------------------------------------------------------------- /controlled_experiments/references/Statistical Comparisons of Classifiers over Multiple Data Sets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/controlled_experiments/references/Statistical Comparisons of Classifiers over Multiple Data Sets.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/A Survey on Distributed Machine Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/A Survey on Distributed Machine Learning.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/Horovod- fast and easy distributed deep learning in TensorFlow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/Horovod- fast and easy distributed deep learning in TensorFlow.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/Large Scale Distributed Deep Networks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/Large Scale Distributed Deep Networks.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/Large-Scale Machine Learning with Stochastic Gradient Descent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/Large-Scale Machine Learning with Stochastic Gradient Descent.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/MLlib- Machine Learning in Apache Spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/MLlib- Machine Learning in Apache Spark.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/Map-Reduce for Machine Learning on Multicore.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/Map-Reduce for Machine Learning on Multicore.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/Parallelized Stochastic Gradient Descent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/Parallelized Stochastic Gradient Descent.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/Scaling Distributed Machine Learning with the Parameter Server.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/Scaling Distributed Machine Learning with the Parameter Server.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/SparkNet- Training Deep Networks in Spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/SparkNet- Training Deep Networks in Spark.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/TensorFlow- A System for Large-Scale Machine Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/TensorFlow- A System for Large-Scale Machine Learning.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/TensorFlow- Large-Scale Machine Learning on Heterogeneous Distributed Systems.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/TensorFlow- Large-Scale Machine Learning on Heterogeneous Distributed Systems.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/references/The Tradeoffs of Large Scale Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/references/The Tradeoffs of Large Scale Learning.pdf -------------------------------------------------------------------------------- /distributed_ml_intro/slides/distributed_ml_intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/distributed_ml_intro/slides/distributed_ml_intro.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/notebooks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/notebooks/README.md -------------------------------------------------------------------------------- /dnn_compression_acceleration/notebooks/student_model_train.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/notebooks/student_model_train.ipynb -------------------------------------------------------------------------------- /dnn_compression_acceleration/notebooks/teacher_model_train_full.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/notebooks/teacher_model_train_full.ipynb -------------------------------------------------------------------------------- /dnn_compression_acceleration/notebooks/teacher_model_train_small.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/notebooks/teacher_model_train_small.ipynb -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/A Survey of Model Compression and Acceleration for Deep Neural Networks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/A Survey of Model Compression and Acceleration for Deep Neural Networks.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/Deep Compression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/Deep Compression.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/DeepGBM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/DeepGBM.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/Distilling the Knowledge in a Neural Network.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/Distilling the Knowledge in a Neural Network.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/Do Deep Nets Really Need to be Deep?.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/Do Deep Nets Really Need to be Deep?.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/Learning both Weights and Connections for Efficient Neural Networks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/Learning both Weights and Connections for Efficient Neural Networks.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/references/The Lottery Ticket Hypothesis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/references/The Lottery Ticket Hypothesis.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/slides/dnn_compression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/slides/dnn_compression.pdf -------------------------------------------------------------------------------- /dnn_compression_acceleration/videos/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/dnn_compression_acceleration/videos/README.md -------------------------------------------------------------------------------- /docker/Docker-tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/docker/Docker-tutorial.md -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/docker/build.sh -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/docker/requirements.txt -------------------------------------------------------------------------------- /docker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/docker/run.sh -------------------------------------------------------------------------------- /docker/start.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/docker/start.sh -------------------------------------------------------------------------------- /gradient_boosting/notebooks/sparkxgb.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/notebooks/sparkxgb.zip -------------------------------------------------------------------------------- /gradient_boosting/notebooks/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/notebooks/utils/metrics.py -------------------------------------------------------------------------------- /gradient_boosting/notebooks/utils/processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/notebooks/utils/processing.py -------------------------------------------------------------------------------- /gradient_boosting/notebooks/xgboost.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/notebooks/xgboost.ipynb -------------------------------------------------------------------------------- /gradient_boosting/notebooks/xgboost4j-0.72.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/notebooks/xgboost4j-0.72.jar -------------------------------------------------------------------------------- /gradient_boosting/notebooks/xgboost4j-spark-0.72.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/notebooks/xgboost4j-spark-0.72.jar -------------------------------------------------------------------------------- /gradient_boosting/references/A Unified Approach to Interpreting Model Predictions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/A Unified Approach to Interpreting Model Predictions.pdf -------------------------------------------------------------------------------- /gradient_boosting/references/CatBoost- gradient boosting with categorical features support.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/CatBoost- gradient boosting with categorical features support.pdf -------------------------------------------------------------------------------- /gradient_boosting/references/Greedy Function Approximation- A Gradient Boosting Machine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/Greedy Function Approximation- A Gradient Boosting Machine.pdf -------------------------------------------------------------------------------- /gradient_boosting/references/PLANET- Massively Parallel Learning of Tree Ensembles with MapReduce.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/PLANET- Massively Parallel Learning of Tree Ensembles with MapReduce.pdf -------------------------------------------------------------------------------- /gradient_boosting/references/Practical Lessons from Predicting Clicks on Ads at Facebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/Practical Lessons from Predicting Clicks on Ads at Facebook.pdf -------------------------------------------------------------------------------- /gradient_boosting/references/Stochastic Gradient Boosting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/Stochastic Gradient Boosting.pdf -------------------------------------------------------------------------------- /gradient_boosting/references/XGBoost- A Scalable Tree Boosting System.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/references/XGBoost- A Scalable Tree Boosting System.pdf -------------------------------------------------------------------------------- /gradient_boosting/slides/mean_target_encoding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/slides/mean_target_encoding.pdf -------------------------------------------------------------------------------- /gradient_boosting/slides/test_categorical_features.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/gradient_boosting/slides/test_categorical_features.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/notebooks/start_spark_example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/notebooks/start_spark_example.ipynb -------------------------------------------------------------------------------- /hadoop_map_reduce/references/Hive - A Warehousing Solution Over a Map-Reduce Framework.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/references/Hive - A Warehousing Solution Over a Map-Reduce Framework.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/references/Map-Reduce for Machine Learning on Multicore.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/references/Map-Reduce for Machine Learning on Multicore.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/references/MapReduce is Good Enough?.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/references/MapReduce is Good Enough?.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/references/MapReduce- Simplified Data Processing on Large Clusters.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/references/MapReduce- Simplified Data Processing on Large Clusters.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/references/The Google File System.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/references/The Google File System.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/references/The Hadoop Distributed File System.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/references/The Hadoop Distributed File System.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/slides/gfs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/slides/gfs.pdf -------------------------------------------------------------------------------- /hadoop_map_reduce/slides/map_reduce.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hadoop_map_reduce/slides/map_reduce.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/notebooks/hyperopt.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/notebooks/hyperopt.ipynb -------------------------------------------------------------------------------- /hyperparameters_optimization/references/A Tutorial on Bayesian Optimization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/A Tutorial on Bayesian Optimization.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/AML.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/AML.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/Algorithms for Hyper-Parameter Optimization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/Algorithms for Hyper-Parameter Optimization.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/Gaussian Processes in Machine Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/Gaussian Processes in Machine Learning.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/Google Vizier- A Service for Black-Box Optimization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/Google Vizier- A Service for Black-Box Optimization.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/Multi-Task Bayesian Optimization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/Multi-Task Bayesian Optimization.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/Random Search for Hyper-Parameter Optimization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/Random Search for Hyper-Parameter Optimization.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/references/Speeding up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/references/Speeding up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves.pdf -------------------------------------------------------------------------------- /hyperparameters_optimization/slides/hyperopt.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/hyperparameters_optimization/slides/hyperopt.pdf -------------------------------------------------------------------------------- /intro/slides/intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/intro/slides/intro.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Annoy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Annoy.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Approximate Nearest Neighbor Search on High Dimensional Data.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Approximate Nearest Neighbor Search on High Dimensional Data.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Approximate nearest neighbor algorithm based on navigable (Information Systems).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Approximate nearest neighbor algorithm based on navigable (Information Systems).pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Deep Hashing for Compact Binary Codes Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Deep Hashing for Compact Binary Codes Learning.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Deep Supervised Hashing for Fast Image Retrieval .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Deep Supervised Hashing for Fast Image Retrieval .pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Hashing for Similarity Search: A Survey.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Hashing for Similarity Search: A Survey.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Mining Massive Datasets - Chapter 3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Mining Massive Datasets - Chapter 3.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/references/Survey of Nearest Neighbor Techniques.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/references/Survey of Nearest Neighbor Techniques.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/slides/nearest_neighbor_search.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/slides/nearest_neighbor_search.pdf -------------------------------------------------------------------------------- /nearest_neighbor_search/videos/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/nearest_neighbor_search/videos/README.md -------------------------------------------------------------------------------- /recsys/notebooks/als_example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/notebooks/als_example.ipynb -------------------------------------------------------------------------------- /recsys/notebooks/recsys.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/notebooks/recsys.ipynb -------------------------------------------------------------------------------- /recsys/references/CB2CF- A Neural Multiview Content-to-Collaborative Filtering Model for Completely Cold Item Recommendations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/CB2CF- A Neural Multiview Content-to-Collaborative Filtering Model for Completely Cold Item Recommendations.pdf -------------------------------------------------------------------------------- /recsys/references/Collaborative Filtering for Implicit Feedback Datasets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/Collaborative Filtering for Implicit Feedback Datasets.pdf -------------------------------------------------------------------------------- /recsys/references/Fast Matrix Factorization for Online Recommendation with Implicit Feedback.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/Fast Matrix Factorization for Online Recommendation with Implicit Feedback.pdf -------------------------------------------------------------------------------- /recsys/references/Large-Scale Matrix Factorization with Distributed Stochastic Gradient Descent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/Large-Scale Matrix Factorization with Distributed Stochastic Gradient Descent.pdf -------------------------------------------------------------------------------- /recsys/references/Neural Collaborative Filtering.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/Neural Collaborative Filtering.pdf -------------------------------------------------------------------------------- /recsys/references/Probabilistic Matrix Factorization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/Probabilistic Matrix Factorization.pdf -------------------------------------------------------------------------------- /recsys/references/PyTorch BigGraph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/PyTorch BigGraph.pdf -------------------------------------------------------------------------------- /recsys/references/deep-content-based-music-recommendation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/references/deep-content-based-music-recommendation.pdf -------------------------------------------------------------------------------- /recsys/slides/recsys.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/slides/recsys.pdf -------------------------------------------------------------------------------- /recsys/videos/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/recsys/videos/README.md -------------------------------------------------------------------------------- /sgd_logreg_nn/notebooks/criteo_prepare_dataset.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/notebooks/criteo_prepare_dataset.ipynb -------------------------------------------------------------------------------- /sgd_logreg_nn/notebooks/ctr_prediction_mllib.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/notebooks/ctr_prediction_mllib.ipynb -------------------------------------------------------------------------------- /sgd_logreg_nn/notebooks/ctr_prediction_mllib_practice.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/notebooks/ctr_prediction_mllib_practice.ipynb -------------------------------------------------------------------------------- /sgd_logreg_nn/references/A Sparse Deep Factorization Machine for Efficient CTR prediction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/A Sparse Deep Factorization Machine for Efficient CTR prediction.pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/Deep & Cross Network for Ad Click Predictions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/Deep & Cross Network for Ad Click Predictions.pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/DeepFM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/DeepFM.pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/FFM in a Real-world Online Advertising System 2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/FFM in a Real-world Online Advertising System 2.pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/Factorization Machines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/Factorization Machines.pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/references/[FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/references/[FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf -------------------------------------------------------------------------------- /sgd_logreg_nn/slides/categorical_features.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/sgd_logreg_nn/slides/categorical_features.pdf -------------------------------------------------------------------------------- /spark_sql/notebooks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/spark_sql/notebooks/README.md -------------------------------------------------------------------------------- /spark_sql/notebooks/spark_performance.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/spark_sql/notebooks/spark_performance.ipynb -------------------------------------------------------------------------------- /spark_sql/notebooks/spark_sql_examples.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/spark_sql/notebooks/spark_sql_examples.ipynb -------------------------------------------------------------------------------- /spark_sql/references/Spark SQL- Relational Data Processing in Spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/spark_sql/references/Spark SQL- Relational Data Processing in Spark.pdf -------------------------------------------------------------------------------- /spark_sql/slides/spark_sql.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/spark_sql/slides/spark_sql.pdf -------------------------------------------------------------------------------- /v1.1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishugaepov/MLBD/HEAD/v1.1.md --------------------------------------------------------------------------------