├── similiar_pictures ├── conda-requirements.txt ├── Procfile ├── app.pyc ├── model.h5 ├── uploads │ ├── a.jpg │ ├── im21.jpg │ ├── im241.jpg │ ├── im603.jpg │ ├── image.png │ ├── photo.jpg │ ├── 1234_002.jpg │ ├── q8ZGbst8x6g.jpg │ ├── 2017-09-21_11.20.29.jpg │ ├── 0_534a9_c253987f_orig.jpeg │ └── 1476521012_maxresdefault.jpg ├── model │ ├── load.pyc │ ├── model.h5 │ ├── __pycache__ │ │ └── load.cpython-35.pyc │ ├── load.py │ └── model.json ├── static │ ├── upload_img.png │ ├── index.js │ └── style.css ├── requirements.txt ├── README.md ├── model.json ├── train.py ├── app.py ├── templates │ └── index.html └── instructions.ipynb ├── Goto november 2016 └── bag_of_word.pkl ├── Hands-on Reinforcement Learning with Python, Second Edition_new chapters ├── 02. A Guide to the Gym Toolkit │ ├── Images │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 5.png │ │ ├── 6.PNG │ │ ├── 7.png │ │ ├── 8.png │ │ ├── 9.png │ │ ├── 10.PNG │ │ ├── 11.png │ │ ├── 12.png │ │ ├── 13.PNG │ │ ├── 17.PNG │ │ ├── 18.PNG │ │ ├── 19.PNG │ │ └── 20.PNG │ ├── README.md │ └── 2.05. Cart Pole Balancing with Random Policy.ipynb ├── 06. Case Study: The MAB Problem │ ├── Images │ │ ├── 1.PNG │ │ ├── 2.PNG │ │ ├── 3.PNG │ │ ├── 4.PNG │ │ ├── 5.PNG │ │ └── 6.PNG │ ├── README.md │ └── 6.03. Epsilon-Greedy.ipynb ├── 07. Deep Q Network and its Variants │ ├── Images │ │ ├── 1.PNG │ │ ├── 2.PNG │ │ ├── 3.png │ │ └── 4.png │ └── READEME.md ├── 09. Actor Critic Methods - A2C and A3C │ ├── Images │ │ └── 1.png │ ├── logs │ │ └── events.out.tfevents.1587573275.Sudharsan │ ├── README.md │ └── 9.01. Overview of actor critic method.ipynb ├── 01. Fundamentals of Reinforcement Learning │ ├── Images │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.PNG │ │ ├── 6.PNG │ │ ├── 7.PNG │ │ ├── 8.PNG │ │ ├── 9.png │ │ ├── a.png │ │ ├── 10.PNG │ │ ├── 11.png │ │ ├── 12.png │ │ ├── 13.png │ │ ├── 14.png │ │ ├── 15.png │ │ ├── 16.png │ │ ├── 17.png │ │ ├── 18.png │ │ ├── 19.png │ │ ├── 2-2.png │ │ ├── 20.png │ │ ├── 21.png │ │ ├── 22.png │ │ ├── 23.png │ │ ├── 24.png │ │ ├── 25.PNG │ │ ├── 26.PNG │ │ ├── 27.PNG │ │ ├── 28.png │ │ ├── 29.PNG │ │ ├── 30.PNG │ │ ├── 31.PNG │ │ ├── 32.PNG │ │ ├── 33.PNG │ │ ├── 34.PNG │ │ ├── 35.png │ │ ├── 36.png │ │ └── 5-8.png │ ├── README.md │ ├── 1.10. Model-Based and Model-Free Learning .ipynb │ ├── 1.03. Reinforcement Learning Algorithm.ipynb │ ├── 1.02. Key Elements of Reinforcement Learning .ipynb │ ├── 1.12. Applications of Reinforcement Learning.ipynb │ ├── 1.05. How RL differs from other ML paradigms?.ipynb │ ├── 1.11. Different Types of Environments.ipynb │ └── 1.01. Basic Idea of Reinforcement Learning .ipynb ├── 03. Bellman Equation and Dynamic Programming │ ├── Images │ │ ├── 1.PNG │ │ ├── 2.PNG │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ └── 6.png │ └── README.md ├── 05. Understanding Temporal Difference Learning │ ├── Images │ │ └── 1.png │ ├── README.md │ └── 5.01. TD Learning.ipynb ├── 08. Policy Gradient Method │ └── README.md ├── 10. Learning DDPG, TD3 and SAC │ └── README.md ├── 04. Monte Carlo Methods │ ├── README.md │ └── 4.01. Understanding the Monte Carlo Method.ipynb └── 11. TRPO, PPO and ACKTR Methods │ ├── README.md │ └── 11.01. Trust Region Policy Optimization.ipynb ├── santa_RL ├── config.py ├── data_loader_monte.py ├── reward_calc.py ├── give_reward_monte.py └── test.py ├── read-writecsv.py ├── salt └── Funct_wrapper.py ├── Programming Assignment: Оптимизация в Python: глобальная оптимизация и оптимизация негладкой функции.py ├── recursion-cellular-image-classification ├── predict.py ├── model_k.py ├── losses.py ├── predict_sites.py ├── scheduler.py ├── config.py ├── predict_multi.py ├── data_loader.py └── eda.py ├── Bayesian_AB ├── make_table (1).py ├── sequence (1).py └── calculate_posterior (1).py ├── nn_gradient_descent.py ├── textmining1.py ├── bot.py ├── selfmade libraries └── lib_val.py ├── carvana └── submit_fast.py ├── Huffman greedy.py ├── approximation.md ├── textmining1.md ├── poisson_bootstrap └── poisson_bootstrap.py ├── tips_tricks ├── 5_2_Regularizing model to avoid overfitting.ipynb ├── 5_3_Adversarial Validation.ipynb ├── 5_1_Validation dataset tuning.ipynb └── Video 1.1 Improving your models using Feature engineering.ipynb ├── big_data_for_engineers ├── Spark_bigramms.ipynb └── spark_shortest_path.ipynb ├── Mercari_0_3875_CV.py ├── mercari └── Mercari_0_3875_CV.py └── Keras starter with bagging (LB: 1120.596).py /similiar_pictures/conda-requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | PIL 3 | -------------------------------------------------------------------------------- /similiar_pictures/Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn -w 4 -b 0.0.0.0:$PORT -k gevent app:app 2 | -------------------------------------------------------------------------------- /similiar_pictures/app.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/app.pyc -------------------------------------------------------------------------------- /similiar_pictures/model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/model.h5 -------------------------------------------------------------------------------- /similiar_pictures/uploads/a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/a.jpg -------------------------------------------------------------------------------- /similiar_pictures/model/load.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/model/load.pyc -------------------------------------------------------------------------------- /similiar_pictures/model/model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/model/model.h5 -------------------------------------------------------------------------------- /Goto november 2016/bag_of_word.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Goto november 2016/bag_of_word.pkl -------------------------------------------------------------------------------- /similiar_pictures/uploads/im21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/im21.jpg -------------------------------------------------------------------------------- /similiar_pictures/uploads/im241.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/im241.jpg -------------------------------------------------------------------------------- /similiar_pictures/uploads/im603.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/im603.jpg -------------------------------------------------------------------------------- /similiar_pictures/uploads/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/image.png -------------------------------------------------------------------------------- /similiar_pictures/uploads/photo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/photo.jpg -------------------------------------------------------------------------------- /similiar_pictures/uploads/1234_002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/1234_002.jpg -------------------------------------------------------------------------------- /similiar_pictures/static/upload_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/static/upload_img.png -------------------------------------------------------------------------------- /similiar_pictures/uploads/q8ZGbst8x6g.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/q8ZGbst8x6g.jpg -------------------------------------------------------------------------------- /similiar_pictures/uploads/2017-09-21_11.20.29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/2017-09-21_11.20.29.jpg -------------------------------------------------------------------------------- /similiar_pictures/uploads/0_534a9_c253987f_orig.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/0_534a9_c253987f_orig.jpeg -------------------------------------------------------------------------------- /similiar_pictures/model/__pycache__/load.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/model/__pycache__/load.cpython-35.pyc -------------------------------------------------------------------------------- /similiar_pictures/uploads/1476521012_maxresdefault.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/similiar_pictures/uploads/1476521012_maxresdefault.jpg -------------------------------------------------------------------------------- /similiar_pictures/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask == 1.0 2 | gevent 3 | gunicorn 4 | keras 5 | numpy 6 | h5py 7 | pillow 8 | https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl 9 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/1.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/2.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/3.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/5.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/6.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/6.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/7.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/8.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/9.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/10.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/10.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/11.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/12.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/13.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/13.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/17.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/17.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/18.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/18.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/19.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/19.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/20.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/Images/20.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/1.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/2.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/3.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/3.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/4.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/4.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/5.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/5.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/6.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/Images/6.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/1.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/2.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/3.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/Images/4.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/09. Actor Critic Methods - A2C and A3C/Images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/09. Actor Critic Methods - A2C and A3C/Images/1.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/1.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/2.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/3.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/4.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/5.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/5.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/6.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/6.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/7.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/7.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/8.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/8.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/9.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/a.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/10.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/10.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/11.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/12.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/13.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/14.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/15.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/16.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/17.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/18.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/19.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/2-2.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/20.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/21.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/22.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/23.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/24.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/25.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/25.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/26.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/26.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/27.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/27.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/28.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/29.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/29.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/30.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/30.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/31.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/31.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/32.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/32.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/33.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/33.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/34.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/34.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/35.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/36.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/5-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/Images/5-8.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/1.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/2.PNG -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/3.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/4.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/5.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/Images/6.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/05. Understanding Temporal Difference Learning/Images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/05. Understanding Temporal Difference Learning/Images/1.png -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/09. Actor Critic Methods - A2C and A3C/logs/events.out.tfevents.1587573275.Sudharsan: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VENHEADs/python/HEAD/Hands-on Reinforcement Learning with Python, Second Edition_new chapters/09. Actor Critic Methods - A2C and A3C/logs/events.out.tfevents.1587573275.Sudharsan -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/09. Actor Critic Methods - A2C and A3C/README.md: -------------------------------------------------------------------------------- 1 | # 9. Actor Critic Methods - A2C and A3C 2 | * 9.1. Overview of Actor Critic Method 3 | * 9.2. Understanding the Actor Critic Method 4 | * 9.2.1. Algorithm - Actor Critic 5 | * 9.3. Advantage Actor Critic 6 | * 9.4. Asynchronous Advantage Actor Critic 7 | * 9.4.1. The Three As 8 | * 9.4.2. The Architecture of A3C 9 | * 9.5. Mountain Car Climbing using A3C 10 | * 9.6. A2C Revisited -------------------------------------------------------------------------------- /santa_RL/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from easydict import EasyDict as edict 5 | 6 | 7 | def _get_default_config(): 8 | config = edict() 9 | config.N_ACTIONS = 10 10 | config.MAX_CAPACITY = 280. # max number of people per day 11 | config.DAYS_OF_MAX_CAPACITY = 25 12 | config.ADDITIONAL_REWARD = 0 13 | config.REWARD_SCALE = 1. 14 | config.n_neurons = 8192 15 | config.episdodes_monte = 50 16 | config.batch_size = 128 17 | config.gamma = 0.99 18 | return config 19 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/README.md: -------------------------------------------------------------------------------- 1 | # 6. Case Study: The MAB Problem 2 | * 6.1. The MAB Problem 3 | * 6.2. Creating Bandit in the Gym 4 | * 6.3. Epsilon-Greedy 5 | * 6.4. Implementing Epsilon-Greedy 6 | * 6.5. Softmax Exploration 7 | * 6.6. Implementing Softmax Exploration 8 | * 6.7. Upper Confidence Bound 9 | * 6.8. Implementing UCB 10 | * 6.9. Thompson Sampling 11 | * 6.10. Implementing Thompson Sampling 12 | * 6.11. Applications of MAB 13 | * 6.12. Finding the Best Advertisement Banner using Bandits 14 | * 6.13. Contextual Bandits 15 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/08. Policy Gradient Method/README.md: -------------------------------------------------------------------------------- 1 | # 8. Policy Gradient Method 2 | * 8.1. Why Policy Based Methods? 3 | * 8.2. Policy Gradient Intuition 4 | * 8.3. Understanding the Policy Gradient 5 | * 8.4. Deriving Policy Gradient 6 | * 8.4.1. Algorithm - Policy Gradient 7 | * 8.5. Variance Reduction Methods 8 | * 8.6. Policy Gradient with Reward-to-go 9 | * 8.6.1. Algorithm - Reward-to-go Policy Gradient 10 | * 8.7. Cart Pole Balancing with Policy Gradient 11 | * 8.8. Policy Gradient with Baseline 12 | * 8.8.1. Algorithm - Reinforce with Baseline 13 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/05. Understanding Temporal Difference Learning/README.md: -------------------------------------------------------------------------------- 1 | ### 5. Understanding Temporal Difference Learning 2 | * 5.1. TD Learning 3 | * 5.2. TD Prediction 4 | * 5.2.1. TD Prediction Algorithm 5 | * 5.3. Predicting the Value of States in a Frozen Lake Environment 6 | * 5.4. TD Control 7 | * 5.5. On-Policy TD Control - SARSA 8 | * 5.6. Computing Optimal Policy using SARSA 9 | * 5.7. Off-Policy TD Control - Q Learning 10 | * 5.8. Computing the Optimal Policy using Q Learning 11 | * 5.9. The Difference Between Q Learning and SARSA 12 | * 5.10. Comparing DP, MC, and TD Methods -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/03. Bellman Equation and Dynamic Programming/README.md: -------------------------------------------------------------------------------- 1 | # 3. Bellman Equation and Dynamic Programming 2 | * 3.1. The Bellman Equation 3 | * 3.1.1. Bellman Equation of the Value Function 4 | * 3.1.2. Bellman Equation of the Q Function 5 | * 3.2. Bellman Optimality Equation 6 | * 3.3. Relation Between Value and Q Function 7 | * 3.4. Dynamic Programming 8 | * 3.5. Value Iteration 9 | * 3.5.1. Algorithm - Value Iteration 10 | * 3.6. Solving the Frozen Lake Problem with Value Iteration 11 | * 3.7. Policy iteration 12 | * 3.7.1. Algorithm - Policy iteration 13 | * 3.8. Solving the Frozen Lake Problem with Policy Iteration 14 | * 3.9. Is DP Applicable to all Environments? -------------------------------------------------------------------------------- /santa_RL/data_loader_monte.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from torch.utils.data import Dataset, DataLoader 3 | import torch 4 | 5 | path_data = '' 6 | BATCH_SIZE = 1 7 | 8 | 9 | class FamilyDataset(Dataset): 10 | def __init__(self, root): 11 | self.root = root 12 | self.df = pd.read_csv(self.root + 'family_data_standard_scaled.csv',) 13 | self.len = self.df.shape[0] 14 | 15 | def __getitem__(self, index): 16 | return torch.Tensor(self.df.iloc[index].values[1:]), self.df.iloc[index].values[0]#.unsqueeze(0) 17 | 18 | def __len__(self): 19 | return self.len 20 | 21 | 22 | train_data = FamilyDataset(path_data) 23 | train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True) 24 | imgs = next(iter(train_loader)) 25 | 26 | -------------------------------------------------------------------------------- /read-writecsv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from datetime import datetime 4 | from sklearn.preprocessing import StandardScaler 5 | from sklearn.cross_validation import KFold 6 | from sklearn.metrics import mean_absolute_error 7 | from scipy.stats import skew, boxcox 8 | from math import exp, log 9 | import os 10 | from os import listdir 11 | import csv 12 | x = pd.read_csv('x111.csv', header=0) # base8_nr900_p10¬_cleaned 13 | y = pd.read_csv('x222.csv', header=0) #submission_5fold-average-xgb_1146.10852_2016-10-13-02-40 14 | z = pd.read_csv('x333.csv', header=0) #genetic gpsubmission 15 | 16 | x1 = np.asarray(x) 17 | y1 = np.asarray(y) 18 | z1 = np.asarray(z) 19 | res = x1[:,1]*0.46+0.46*y1[:,1]+0.08*z1[:,1] 20 | 21 | x1[:,1]=res 22 | x.iloc[:][[1]]=res 23 | x.to_csv('av0.46_0.46_0.08.csv', index=None) 24 | v = pd.DataFrame(x1) 25 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/07. Deep Q Network and its Variants/READEME.md: -------------------------------------------------------------------------------- 1 | # 7. Deep Q Network and its Variants 2 | 3 | * 7.1. What is Deep Q Network? 4 | * 7.2. Understanding DQN 5 | * 7.2.1. Replay Buffer 6 | * 7.2.2. Loss Function 7 | * 7.2.3. Target Network 8 | * 7.2.4. Putting it All Together 9 | * 7.2.5. Algorithm - DQN 10 | * 7.3. Playing Atari Games using DQN 11 | * 7.3.1. Architecture of DQN 12 | * 7.3.2. Getting Hands-on with the DQN 13 | * 7.4. Double DQN 14 | * 7.4.1. Algorithm - Double DQN 15 | * 7.5. DQN with Prioritized Experience Replay 16 | * 7.5.1. Types of Prioritization 17 | * 7.5.2. Correcting the Bias 18 | * 7.6. Dueling DQN 19 | * 7.6.1. Understanding Dueling DQN 20 | * 7.6.2.Architecture of Dueling DQN 21 | * 7.7. Deep Recurrent Q Network 22 | * 7.7.1. Architecture of DRQN -------------------------------------------------------------------------------- /salt/Funct_wrapper.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def h_flip(x, apply): 5 | return tf.image.flip_left_right(x) if apply else x 6 | 7 | 8 | def v_flip(x, apply): 9 | return tf.image.flip_up_down(x) if apply else x 10 | 11 | 12 | def rotate(x, angle): 13 | k = angle // 90 if angle >= 0 else (angle + 360) // 90 14 | return tf.image.rot90(x, k) 15 | 16 | 17 | def h_shift(x, distance): 18 | return tf.manip.roll(x, distance, axis=0) 19 | 20 | 21 | def v_shift(x, distance): 22 | return tf.manip.roll(x, distance, axis=1) 23 | 24 | 25 | def gmean(x): 26 | g_pow = 1 / x.get_shape().as_list()[0] 27 | x = tf.reduce_prod(x, axis=0, keepdims=True) 28 | x = tf.pow(x, g_pow) 29 | return x 30 | 31 | 32 | def mean(x): 33 | return tf.reduce_mean(x, axis=0, keepdims=True) 34 | 35 | 36 | def max(x): 37 | return tf.reduce_max(x, axis=0, keepdims=True) -------------------------------------------------------------------------------- /similiar_pictures/README.md: -------------------------------------------------------------------------------- 1 | 2 | The main problem for me - was absence of any experience with web development and Flask/Django 3 | The process by itself is quite simple 4 | 5 | As i have no gpu - I was limited to my laptop 6 | I took the pretrained ResNet for 1000 class classification 7 | An output from the last layer was taken as 2000-vector representation of each picture 8 | With it I created a dictionary with image name - 2000 vector representation 9 | Than given a new picture - it is easy to get a 2000 vector representation for it a find 5 closest 10 | pictures out of 25000 based on cosine distance 11 | 12 | The results are presented 13 | 14 | To use it you need to add flickr 25000 set folder inside static folder 15 | Just run the python app.py in the folder with it 16 | https://www.youtube.com/watch?v=gymnASuGCkc&feature=youtu.be - demo 17 | 18 | https://yadi.sk/d/pHEgVJkf3VjvN2 - all files but with no pictures set (mirflickr folder in static folder - which is 25K data set from flickr) 19 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/README.md: -------------------------------------------------------------------------------- 1 | # 2. A Guide to the Gym Toolkit 2 | * 2.1. Setting Up our Machine 3 | * 2.1.1. Installing Anaconda 4 | * 2.1.2. Installing the Gym Toolkit 5 | * 2.1.3. Common Error Fixes 6 | * 2.2. Creating our First Gym Environment 7 | * 2.2.1. Exploring the Environment 8 | * 2.2.2. States 9 | * 2.2.3. Actions 10 | * 2.2.4. Transition Probability and Reward Function 11 | * 2.3. Generating an episode 12 | * 2.4. Classic Control Environments 13 | * 2.4.1. State Space 14 | * 2.4.2. Action Space 15 | * 2.5. Cart Pole Balancing with Random Policy 16 | * 2.6. Atari Game Environments 17 | * 2.6.1. General Environment 18 | * 2.6.2. Deterministic Environment 19 | * 2.7. Agent Playing the Tennis Game 20 | * 2.8. Recording the Game 21 | * 2.9. Other environments 22 | * 2.9.1. Box 2D 23 | * 2.9.2. Mujoco 24 | * 2.9.3. Robotics 25 | * 2.9.4. Toy text 26 | * 2.9.5. Algorithms 27 | * 2.10. Environment Synopsis -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/10. Learning DDPG, TD3 and SAC/README.md: -------------------------------------------------------------------------------- 1 | # 10. Learning DDPG, TD3 and SAC 2 | * 10.1. Deep Deterministic Policy Gradient 3 | * 10.1.1. An Overview of DDPG 4 | * 10.1.2. Components of DDPG 5 | * 10.1.2.1. Critic network 6 | * 10.1.2.2. Actor Network 7 | * 10.1.3. Putting it all Together 8 | * 10.1.4. Algorithm - DDPG 9 | * 10.2. Swinging Up the Pendulum using DDPG 10 | * 10.3. Twin Delayed DDPG 11 | * 10.3.1. Key Features of TD3 12 | * 10.3.2. Clipped Double Q Learning 13 | * 10.3.3. Delayed Policy Updates 14 | * 10.3.4. Target Policy Smoothing 15 | * 10.3.5. Putting it all Together 16 | * 10.3.6. Algorithm - TD3 17 | * 10.4. Soft Actor Critic 18 | * 10.4.1. Understanding Soft Actor Critic 19 | * 10.4.2. V and Q Function with the Entropy Term 20 | * 10.4.3. Critic Network 21 | * 10.4.3.1. Value Network 22 | * 10.4.3.2. Q Network 23 | * 10.5.4. Actor Network 24 | * 10.5.5. Putting it all Together -------------------------------------------------------------------------------- /Programming Assignment: Оптимизация в Python: глобальная оптимизация и оптимизация негладкой функции.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | from scipy import sin,cos,exp 4 | import scipy.linalg 5 | from scipy import interpolate 6 | from matplotlib import pylab as plt 7 | from scipy import optimize 8 | fx = lambda x: sin(x / 5.0) * exp(x / 10.0) + 5.0 * exp(-x / 2.0) #задаем функцию 9 | apx = scipy.optimize.minimize (fx,30,method="BFGS") # минимищируем с приближением 30 можно сравнить с приближеением 2 10 | x = np.arange(0, 30, 0.1) #стриом функцию 11 | plt.plot(x, fx(x)) #выводим график 12 | plt.show() #выводим 13 | bounds = [(1,30)] 14 | scipy.optimize.differential_evolution(fx,bounds) # алгоритм дифференциальной эволюции 15 | def h1(x): 16 | return int(fx(x)) # создаем функцию новую 17 | hx = np.vectorize(h1) # делаем ее активной для использования с помощью более чем 1 переменной 18 | 19 | hx1 = np.vectorize(lambda x: int(sin(x / 5.0) * exp(x / 10.0) + 5.0 * exp(-x / 2.0))) #вариант 3 20 | fx(x).astype(int) #вариант 3 21 | def z(x): #вариант 4 22 | return fx(x).astype(int) 23 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/04. Monte Carlo Methods/README.md: -------------------------------------------------------------------------------- 1 | ### 4. Monte Carlo Methods 2 | * 4.1. Understanding the Monte Carlo Method 3 | * 4.2. Prediction and Control Tasks 4 | * 4.2.1. Prediction Task 5 | * 4.2.2. Control Task 6 | * 4.3. Monte Carlo Prediction 7 | * 4.3.1. MC Prediction Algorithm 8 | * 4.3.2. Types of MC prediction 9 | * 4.3.3. First-visit Monte Carlo 10 | * 4.3.4. Every visit Monte Carlo 11 | * 4.4. Understanding the BlackJack Game 12 | * 4.4.1. Blackjack Environment in the Gym 13 | * 4.5. Every-visit MC Prediction with Blackjack Game 14 | * 4.6. First-visit MC Prediction with Blackjack Game 15 | * 4.7. Incremental Mean Updates 16 | * 4.8. MC Prediction (Q Function) 17 | * 4.9. Monte Carlo Control 18 | * 4.10. On-Policy Monte Carlo Control 19 | * 4.11. Monte Carlo Exploring Starts 20 | * 4.12. Monte Carlo with Epsilon-Greedy Policy 21 | * 4.7.5. Algorithm MC Control with Epsilon-Greedy Policy 22 | * 4.13. Implementing On-Policy MC Control 23 | * 4.14. Off-Policy Monte Carlo Control 24 | * 4.15. Is MC Method Applicable to all Tasks? -------------------------------------------------------------------------------- /recursion-cellular-image-classification/predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from data_loader import tloader 3 | from tqdm import tqdm 4 | import pandas as pd 5 | import numpy as np 6 | from model_k import model_resnet_18 7 | import os 8 | 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" 10 | path = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint' 11 | path_data = '/mnt/ssd1/datasets/Recursion_class/' 12 | device = 'cuda' 13 | model = model_resnet_18 14 | checkpoint_name = 'resnet18_general_site_1_107_val_accuracy=0.237779.pth' 15 | 16 | checkpoint = torch.load(path + '/' + checkpoint_name) 17 | model.load_state_dict(checkpoint) 18 | model.to(device) 19 | with torch.no_grad(): 20 | preds = np.empty(0) 21 | for x, _ in tqdm(tloader): 22 | x = x.to(device) 23 | output = model(x) 24 | idx = output.max(dim=-1)[1].cpu().numpy() 25 | preds = np.append(preds, idx, axis=0) 26 | 27 | 28 | submission = pd.read_csv(path_data + '/test.csv') 29 | submission['sirna'] = preds.astype(int) 30 | submission.to_csv('submission_{}.csv'.format(checkpoint_name), index=False, columns=['id_code', 'sirna']) 31 | 32 | 33 | -------------------------------------------------------------------------------- /Bayesian_AB/make_table (1).py: -------------------------------------------------------------------------------- 1 | 2 | import yt.wrapper as yt 3 | from nile.api.v1 import clusters 4 | from nile.api.v1 import aggregators as na 5 | from qb2.api.v1 import filters as qf 6 | from nile.api.v1 import filters as nf 7 | from nile.api.v1 import extractors as ne 8 | import pandas as pd 9 | import numpy as np 10 | import timeit 11 | import datetime 12 | import os 13 | from nile.api.v1 import statface as ns 14 | from nile.api.v1 import cli 15 | import ast 16 | from nile.api.v1 import Record 17 | import re 18 | import calendar 19 | 20 | 21 | # In[ ]: 22 | 23 | 24 | from yql.api.v1.client import YqlClient 25 | 26 | def create_ab_table(date_start,date_end,experiment_name): 27 | client = YqlClient(db='',token='') 28 | 29 | request = client.query( 30 | """ 31 | 32 | 33 | 34 | 35 | 36 | """.format(date_start,date_end,experiment_name,experiment_name), syntax_version=1 37 | ) 38 | request.run() 39 | for table in request.get_results(): # access to results blocks until they are ready 40 | while(table.fetch_full_data() == False): 41 | time.sleep(1) 42 | 43 | table = '//tmp/sovetnik/valeriy/{}'.format(experiment_name) 44 | return table 45 | 46 | 47 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/README.md: -------------------------------------------------------------------------------- 1 | 2 | # 1. Fundamentals of Reinforcement Learning 3 | 4 | * 1.1. Basic Idea of Reinforcement Learning 5 | * 1.2. Key Elements of Reinforcement Learning 6 | * 1.2.1. Agent 7 | * 1.2.2. Environment 8 | * 1.2.3. State and Action 9 | * 1.2.4. Reward 10 | * 1.3. Reinforcement Learning Algorithm 11 | * 1.4. RL Agent in the Grid World 12 | * 1.5. How RL differs from other ML paradigms? 13 | * 1.6. Markov Decision Processes 14 | * 1.6.1. Markov Property and Markov Chain 15 | * 1.6.2. Markov Reward Process 16 | * 1.6.3. Markov Decision Process 17 | * 1.7. Action Space, Policy, Episode and Horizon 18 | * 1.8. Return, Discount Factor and Math Essentials 19 | * 1.9. Value Function and Q Function 20 | * 1.10. Model-Based and Model-Free Learning 21 | * 1.11. Different Types of Environments 22 | * 1.11.1. Deterministic and Stochastic Environment 23 | * 1.11.2. Discrete and Continuous Environment 24 | * 1.11.3. Episodic and Non-Episodic Environment 25 | * 1.11.4. Single and Multi-Agent Environment 26 | * 1.12. Applications of Reinforcement Learning 27 | * 1.13. Reinforcement Learning Glossary 28 | -------------------------------------------------------------------------------- /similiar_pictures/model/load.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras.models 3 | from keras.models import model_from_json 4 | from scipy.misc import imread, imresize,imshow 5 | import tensorflow as tf 6 | from keras.models import load_model 7 | import pickle 8 | 9 | def init(): 10 | # json_file = open('model.json','r') 11 | # loaded_model_json = json_file.read() 12 | # json_file.close() 13 | # loaded_model = model_from_json(loaded_model_json) 14 | # #load woeights into new model 15 | # loaded_model.load_weights("model.h5") 16 | # print("Loaded Model from disk") 17 | # 18 | # #compile and evaluate loaded model 19 | # loaded_model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) 20 | # #loss,accuracy = model.evaluate(X_test,y_test) 21 | # #print('loss:', loss) 22 | # #print('accuracy:', accuracy) 23 | # graph = tf.get_default_graph() 24 | # return loaded_model,graph 25 | import os 26 | print(os.getcwd()) 27 | loaded_model = load_model("/Users/prune/Documents/similiar_pictures/model/model_to_predict_vector.h5") 28 | with open('/Users/prune/Documents/similiar_pictures/model/dict_representation.pickle', 'rb') as handle: 29 | dict_vector = pickle.load(handle) 30 | 31 | return loaded_model,dict_vector 32 | 33 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/11. TRPO, PPO and ACKTR Methods/README.md: -------------------------------------------------------------------------------- 1 | # 11. TRPO, PPO and ACKTR Methods 2 | * 11.1 Trust Region Policy Optimization 3 | * 11.2. Math Essentials 4 | * 11.2.1. Taylor series 5 | * 11.2.2. Trust Region method 6 | * 11.2.3. Conjugate Gradient Method 7 | * 11.2.4. Lagrange Multiplier 8 | * 11.2.5. Importance Sampling 9 | * 11.3. Designing the TRPO Objective Function 10 | * 11.3.1. Parameterizing the Policy 11 | * 11.3.2. Sample Based Estimation 12 | * 11.4. Solving the TRPO Objective Function 13 | * 11.4.1. Computing the Search Direction 14 | * 11.4.2. Perform Line Search in the Search Direction 15 | * 11.5. Algorithm - TRPO 16 | * 11.6. Proximal Policy Optimization 17 | * 11.7. PPO with Clipped Objective 18 | * 11.8. Algorithm - PPO-Clipped 19 | * 11.9. Implementing PPO-Clipped Method 20 | * 11.10. PPO with Penalized Objective 21 | * 11.10.1. Algorithm - PPO-Penalty 22 | * 11.11. Actor Critic using Kronecker Factored Trust Region 23 | * 11.12. Math Essentials 24 | * 11.12.1. Block Matrix 25 | * 11.12.2. Block Diagonal Matrix 26 | * 11.12.3. Kronecker Product 27 | * 11.12.4. Vec Operator 28 | * 11.12.5. Properties of Kronecker Product 29 | * 11.13. Kronecker-Factored Approximate Curvature (K-FAC) 30 | * 11.14. K-FAC in Actor Critic 31 | * 11.14.1 Incorporating Trust Region -------------------------------------------------------------------------------- /nn_gradient_descent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.preprocessing import scale 3 | 4 | X = np.array([[1,4,6,3],[5,3,2,5],[2,5,3,-2],[4,5,2,-8]]) 5 | y = np.array([0,0,1,1]).reshape((-1,1)) 6 | X = scale(X) 7 | 8 | def loss(true,predicts, deriv = False): 9 | if deriv: 10 | return -true/predicts + (1-true)/(1-predicts) 11 | return -1*(true *np.log(predicts) + (1-true)*np.log(1-predicts)) 12 | 13 | def sigmoid(x, deriv = False): 14 | if deriv: 15 | return x*(1-x) 16 | return 1/(1+np.exp(-x)) 17 | 18 | neurons = 64 19 | weight_1 = np.random.normal(size = (4,neurons)) 20 | weight_2 = np.random.normal(size = (neurons,1)) 21 | 22 | bias_1 = np.mean(weight_1,axis=0) 23 | bias_2 = np.mean(weight_2,axis=0) 24 | for e in range(2000): 25 | alpha = 0.01 26 | 27 | layer_1 = sigmoid(np.dot(X,weight_1)+ bias_1) 28 | layer_2 = sigmoid(np.dot(layer_1,weight_2)+ bias_2) 29 | 30 | log_loss = np.mean(loss(y, layer_2)) 31 | 32 | error_2_1 = loss(y,layer_2,True) 33 | error_2_2 = error_2_1*sigmoid(layer_2,True) 34 | 35 | error_1_1 = np.dot(error_2_2,weight_2.transpose()) 36 | error_1_2 = error_1_1*sigmoid(layer_1,True) 37 | 38 | weight_2 -= alpha*np.dot(layer_1.transpose(),error_2_2) 39 | weight_1 -= alpha*np.dot(X.transpose(),error_1_2) 40 | 41 | bias_2 -= np.mean(error_2_2,axis = 0) 42 | bias_1 -= np.mean(error_1_2,axis = 0) 43 | 44 | # if e % 1000 == 0: 45 | # print(log_loss) 46 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/model_k.py: -------------------------------------------------------------------------------- 1 | from torchvision import models 2 | import torch 3 | from config import _get_default_config 4 | import torch.nn as nn 5 | 6 | config = _get_default_config() 7 | 8 | 9 | if config.model == 'resnet18': 10 | classes = 1108 11 | model_resnet_18 = models.resnet18(pretrained=True) 12 | 13 | num_ftrs = model_resnet_18.fc.in_features 14 | model_resnet_18.fc = torch.nn.Linear(num_ftrs, classes) 15 | 16 | # let's make our model work with 6 channels 17 | trained_kernel = model_resnet_18.conv1.weight 18 | new_conv = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False) 19 | with torch.no_grad(): 20 | new_conv.weight[:, :] = torch.stack([torch.mean(trained_kernel, 1)]*6, dim=1) 21 | model_resnet_18.conv1 = new_conv 22 | 23 | if config.model == 'densenet121': 24 | classes = 1108 25 | model_resnet_18 = models.densenet121(pretrained=True) 26 | 27 | num_ftrs = model_resnet_18.classifier.in_features 28 | model_resnet_18.classifier = torch.nn.Linear(num_ftrs, classes) 29 | # 30 | # # let's make our model work with 6 channels 31 | trained_kernel = model_resnet_18.features.conv0.weight 32 | new_conv = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False) 33 | with torch.no_grad(): 34 | new_conv.weight[:, :] = torch.stack([torch.mean(trained_kernel, 1)]*6, dim=1) 35 | model_resnet_18.features.conv0 = new_conv 36 | 37 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | 7 | class FocalLoss(nn.Module): 8 | def __init__(self, gamma=0, alpha=None, size_average=True): 9 | super(FocalLoss, self).__init__() 10 | self.gamma = gamma 11 | self.alpha = alpha 12 | if isinstance(alpha, (float, int)): self.alpha = torch.Tensor([alpha,1-alpha]) 13 | if isinstance(alpha, list): self.alpha = torch.Tensor(alpha) 14 | self.size_average = size_average 15 | 16 | def forward(self, input, target): 17 | if input.dim()>2: 18 | input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W 19 | input = input.transpose(1,2) # N,C,H*W => N,H*W,C 20 | input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C 21 | target = target.view(-1,1) 22 | 23 | logpt = F.log_softmax(input) 24 | logpt = logpt.gather(1,target) 25 | logpt = logpt.view(-1) 26 | pt = Variable(logpt.data.exp()) 27 | 28 | if self.alpha is not None: 29 | if self.alpha.type()!=input.data.type(): 30 | self.alpha = self.alpha.type_as(input.data) 31 | at = self.alpha.gather(0,target.data.view(-1)) 32 | logpt = logpt * Variable(at) 33 | 34 | loss = -1 * (1-pt)**self.gamma * logpt 35 | if self.size_average: return loss.mean() 36 | else: return loss.sum() -------------------------------------------------------------------------------- /textmining1.py: -------------------------------------------------------------------------------- 1 | import scipy.spatial 2 | import numpy as np 3 | import re 4 | 5 | data = open("senten.txt", "r") # открываем 6 | sent = data.readlines() #считываем все строки 7 | print(sent) 8 | 9 | i = 0 10 | for sentence in sent: # берем первую строку 11 | sentence = re.split('[^a-z]', sentence.lower()) # разбиваем на слова 12 | sent[i] = filter(None, sentence) # убираем пустые слова из первой строки и сохраняем 13 | i += 1 14 | 15 | word_index = dict() # создаем пустой словарь 16 | i = 0 17 | for sentence in sent: # берем первую строку 18 | for word in sentence: # берем первое слово в первой строке 19 | if word not in word_index: # если слова нету в словаре то 20 | word_index[word] = i # добавляем его по индексом 0 в словарь 21 | i += 1 # следующий индекс будет на единицу больше 22 | 23 | 24 | m = np.zeros((len(sent), len(word_index))) # создаем массив размерность строки*слова 25 | m.shape 26 | 27 | for sent_i in xrange(0, len(sent)): # смотрим первую строку 28 | for word in sent[sent_i]: # смотрим слова в первой строке 29 | word_i = word_index[word] # находим индекс этого слова 30 | m[sent_i][word_i] += 1 # вносим это слово в массив, изначально там нули но как только в стркое нахоидм слово с этим индексов 31 | 32 | distances = list() # смотрим теперь каждую строку 33 | for i in xrange(0, len(sent)): 34 | distance = scipy.spatial.distance.cosine(m[0,:],m[i,:]) # считаем косинусную дистанцию 35 | distances.append((i,distance)) 36 | 37 | sort = sorted(distances,key=lambda tup: tup[1]) # сортируем 38 | print(sort[1],sort[2]) 39 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/predict_sites.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from data_loader import tloader 3 | from tqdm import tqdm 4 | import pandas as pd 5 | import numpy as np 6 | from model_k import model_resnet_18 7 | import os 8 | from config import _get_default_config 9 | 10 | config = _get_default_config() 11 | MODEL_NAME = config.model 12 | 13 | checkpoint_name_1 = 'densenet121_general_site_1_77_val_accuracy=0.4011501.pth' 14 | checkpoint_name_2 = 'densenet121_general_site_2_74_val_accuracy=0.4164841.pth' 15 | 16 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 17 | path = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint_{}'.format(MODEL_NAME) 18 | path_data = '/mnt/ssd1/datasets/Recursion_class/' 19 | device = 'cuda' 20 | model_1 = model_resnet_18 21 | model_2 = model_resnet_18 22 | 23 | 24 | checkpoint = torch.load(path + '/' + checkpoint_name_1) 25 | model_1.load_state_dict(checkpoint) 26 | model_1.to(device) 27 | 28 | checkpoint = torch.load(path + '/' + checkpoint_name_2) 29 | model_2.load_state_dict(checkpoint) 30 | model_2.to(device) 31 | 32 | 33 | with torch.no_grad(): 34 | preds = np.empty(0) 35 | for x, _ in tqdm(tloader): 36 | x = x.to(device) 37 | output_1 = model_1(x) 38 | output_2 = model_2(x) 39 | output = output_1 + output_2 40 | idx = output.max(dim=-1)[1].cpu().numpy() 41 | preds = np.append(preds, idx, axis=0) 42 | 43 | 44 | submission = pd.read_csv(path_data + '/test.csv') 45 | submission['sirna'] = preds.astype(int) 46 | submission.to_csv('submission_{}.csv'.format('dense_mix_2_2_sites'), index=False, columns=['id_code', 'sirna']) 47 | 48 | 49 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/scheduler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.optim.optimizer import Optimizer 4 | from functools import partial 5 | 6 | 7 | class ParamScheduler(object): 8 | 9 | def __init__(self, optimizer, scale_fn, step_size): 10 | if not isinstance(optimizer, Optimizer): 11 | raise TypeError('{} is not an Optimizer'.format( 12 | type(optimizer).__name__)) 13 | 14 | self.optimizer = optimizer 15 | self.scale_fn = scale_fn 16 | self.step_size = step_size 17 | self.last_batch_iteration = 0 18 | 19 | def step(self): 20 | for param_group in self.optimizer.param_groups: 21 | param_group['lr'] = self.scale_fn(self.last_batch_iteration / self.step_size) 22 | 23 | self.last_batch_iteration += 1 24 | 25 | 26 | def combine_scale_functions(scale_fns, phases=None): 27 | if phases is None: 28 | phases = [1. / len(scale_fns)] * len(scale_fns) 29 | phases = [phase / sum(phases) for phase in phases] 30 | phases = torch.tensor([0] + phases) 31 | phases = torch.cumsum(phases, 0) 32 | 33 | def _inner(x): 34 | idx = (x >= phases).nonzero().max() 35 | actual_x = (x - phases[idx]) / (phases[idx + 1] - phases[idx]) 36 | return scale_fns[idx](actual_x) 37 | 38 | return _inner 39 | 40 | 41 | def scale_cos(start, end, x): 42 | return start + (1 + np.cos(np.pi * (1 - x))) * (end - start) / 2 43 | 44 | 45 | def return_scale_fn(): 46 | return combine_scale_functions( 47 | [partial(scale_cos, 1e-5, 5e-4), partial(scale_cos, 5e-4, 1e-4)], [0.2, 0.8]) -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | # Настройки 2 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters 3 | import apiai, json 4 | updater = Updater(token='484814346:AAFXtoYPhZTBqSRzBZiKRbkO5JeyKBsnqlg') # Токен API к Telegram 5 | dispatcher = updater.dispatcher 6 | # Обработка команд 7 | def startCommand(bot, update): 8 | bot.send_message(chat_id=update.message.chat_id, text='Привет, давай пообщаемся?') 9 | def textMessage(bot, update): 10 | request = apiai.ApiAI('dca6e3be475a4d7c94e70a7052f3c0a8').text_request() # Токен API к Dialogflow 11 | request.lang = 'ru' # На каком языке будет послан запрос 12 | request.session_id = 'BatlabAIBot' # ID Сессии диалога (нужно, чтобы потом учить бота) 13 | request.query = update.message.text # Посылаем запрос к ИИ с сообщением от юзера 14 | responseJson = json.loads(request.getresponse().read().decode('utf-8')) 15 | response = responseJson['result']['fulfillment']['speech'] # Разбираем JSON и вытаскиваем ответ 16 | # Если есть ответ от бота - присылаем юзеру, если нет - бот его не понял 17 | if response: 18 | bot.send_message(chat_id=update.message.chat_id, text=response) 19 | else: 20 | bot.send_message(chat_id=update.message.chat_id, text='Я Вас не совсем понял!') 21 | # Хендлеры 22 | start_command_handler = CommandHandler('start', startCommand) 23 | text_message_handler = MessageHandler(Filters.text, textMessage) 24 | # Добавляем хендлеры в диспетчер 25 | dispatcher.add_handler(start_command_handler) 26 | dispatcher.add_handler(text_message_handler) 27 | # Начинаем поиск обновлений 28 | updater.start_polling(clean=True) 29 | # Останавливаем бота, если были нажаты Ctrl + C 30 | updater.idle() -------------------------------------------------------------------------------- /selfmade libraries/lib_val.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[2]: 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | def perf_measure(y_actual,y_hat): 10 | TP = 0 11 | FP = 0 12 | TN = 0 13 | FN = 0 14 | 15 | for i in range(len(y_hat)): 16 | if np.all(y_actual[i]==1 and y_hat[i]==1): 17 | TP += 1 18 | for i in range(len(y_hat)): 19 | if np.all(y_hat[i]==1 and y_actual[i]==0): 20 | FP += 1 21 | for i in range(len(y_hat)): 22 | if np.all(y_actual[i]==0 and y_hat[i]==0): 23 | TN += 1 24 | for i in range(len(y_hat)): 25 | if np.all(y_hat[i]==0 and y_actual[i]==1): 26 | FN += 1 27 | 28 | return(TP, FP, TN, FN) 29 | def evaluate_prediction(predictions, target, title="Confusion matrix"): 30 | print('accuracy %s' % accuracy_score(target, predictions)) 31 | cm = confusion_matrix(target, predictions) 32 | print('confusion matrix\n %s' % cm) 33 | print('(row=expected, col=predicted)') 34 | 35 | cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 36 | plot_confusion_matrix(cm_normalized, title + ' Normalized') 37 | 38 | from sklearn.metrics import accuracy_score, confusion_matrix 39 | def plot_confusion_matrix(cm, title='Confusion matrix', cmap="cool"): 40 | 41 | plt.figure(figsize=(5,5)) 42 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 43 | plt.title(title) 44 | plt.colorbar() 45 | tick_marks = np.arange(2) 46 | target_names ="01" 47 | plt.xticks(tick_marks, target_names, rotation=45) 48 | plt.yticks(tick_marks, target_names) 49 | plt.tight_layout() 50 | plt.ylabel('True label') 51 | plt.xlabel('Predicted label') 52 | 53 | -------------------------------------------------------------------------------- /carvana/submit_fast.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | import pandas as pd 7 | 8 | ################# 9 | #weg = /media/n01z3/storage3/dataset/carvana/linknet_18_new/fold4/test/ # путь к картинкам 10 | #weg_csv = #путь к csv 11 | ########пу 12 | import numpy as np 13 | from multiprocessing import Pool 14 | 15 | 16 | import cv2 17 | from tqdm import tqdm 18 | load_img_5 = lambda im: cv2.imread(join(weg, '{}.png'.format(im))) 19 | 20 | threshold = 0.5 21 | 22 | 23 | 24 | df_test = pd.read_csv(weg_csv+'sample_submission.csv') 25 | ids_test = df_test['img'].map(lambda s: s.split('.')[0]) 26 | 27 | 28 | orig_width = 1918 29 | orig_height = 1280 30 | 31 | def _mask_to_rle_string(mask): 32 | """Convert boolean/`binary uint` mask to RLE string.""" 33 | # Mask to RLE 34 | pixels = mask.flatten() 35 | pixels[0] = 0 36 | pixels[-1] = 0 37 | runs = np.where(pixels[1:] != pixels[:-1])[0] + 2 38 | runs[1::2] = runs[1::2] - runs[:-1:2] 39 | 40 | # RLE to string 41 | return ' '.join(str(x) for x in runs) 42 | 43 | names = [] 44 | for id in ids_test: 45 | names.append('{}.jpg'.format(id)) 46 | 47 | 48 | rles = [] 49 | 50 | def get_rle(id_1): 51 | 52 | 53 | z0 = (load_img_5(id_1)/255.)[:,:,0:1] 54 | 55 | 56 | 57 | 58 | 59 | 60 | loaded = z0 61 | 62 | 63 | #prob = cv2.resize(loaded, (orig_width, orig_height)) 64 | mask = loaded > threshold 65 | rle = _mask_to_rle_string(mask) 66 | return rle 67 | 68 | 69 | names_2 = list(ids_test) 70 | 71 | chunk = 50 72 | rles = [] 73 | for i in tqdm(range(0, len(names_2), chunk), total=len(names_2) // chunk): 74 | p = Pool(processes=25) 75 | rles += p.map(get_rle, names_2[i:i + chunk]) 76 | p.terminate() 77 | #print('vasya') 78 | 79 | print("Generating submission file...") 80 | df = pd.DataFrame({'img': names, 'rle_mask': rles}) 81 | df.to_csv('nizh.csv.gz', index=False, compression='gzip') 82 | print('finished_fold_') 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /Huffman greedy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[126]: 5 | 6 | 7 | import numpy as np 8 | 9 | 10 | # In[127]: 11 | 12 | 13 | class Node(): 14 | 15 | def __init__(self,parent,value): 16 | self.value = value 17 | self.parent = parent 18 | self.l_child = None 19 | self.r_child = None 20 | 21 | 22 | # In[128]: 23 | 24 | 25 | list_pr = [] 26 | with open('input_random_48_10000.txt') as f: 27 | for line in f: 28 | list_pr.append(int(line.strip().split('\t')[0])) 29 | list_pr = list_pr[1:] 30 | # list_pr = np.unique(list_pr) 31 | 32 | 33 | # In[129]: 34 | 35 | 36 | values = np.sort(list_pr) 37 | # values = values* 0.00000000001 38 | # values[0] = 99993321**2 39 | q_1 = [Node(None,value) for value in values] 40 | q_2 = [] 41 | 42 | depth = 0 43 | while q_1 or len(q_2) > 1: 44 | 45 | if not q_1: 46 | node_1 = q_2.pop(0) 47 | elif not q_2: 48 | node_1 = q_1.pop(0) 49 | else: 50 | if q_1[0].value < q_2[0].value: 51 | node_1 = q_1.pop(0) 52 | else: 53 | node_1 = q_2.pop(0) 54 | 55 | if not q_1: 56 | node_2 = q_2.pop(0) 57 | elif not q_2: 58 | node_2 = q_1.pop(0) 59 | else: 60 | if q_1[0].value < q_2[0].value: 61 | node_2 = q_1.pop(0) 62 | else: 63 | node_2 = q_2.pop(0) 64 | merge = node_1.value + node_2.value 65 | new_node = Node(None,merge) 66 | new_node.r_child = node_2 67 | new_node.l_child = node_1 68 | q_2.append(new_node) 69 | 70 | 71 | # In[130]: 72 | 73 | 74 | results = {} 75 | def traverse(node,path): 76 | global results 77 | if node.l_child != None: 78 | traverse(node.l_child,path + '1') 79 | if not node.r_child and not node.l_child: 80 | results[node.value] = path 81 | if node.r_child != None: 82 | traverse(node.r_child,path + '0') 83 | 84 | 85 | # In[131]: 86 | 87 | 88 | traverse(q_2[0],'') 89 | 90 | 91 | # In[ ]: 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.10. Model-Based and Model-Free Learning .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Model-Based and Model-free learning\n", 8 | "\n", 9 | "**Model-based learning** - In model-based learning, an agent will have a complete description of the environment. That is, we learned that the transition probability tells the probability of moving from a state to the next state by performing an action and the reward function tells the reward we would obtain while moving from a state to the next state by performing an action . When the agent knows the model dynamics of the environment, that is, when the agent knows the transition probability of the environment then it is called model-based learning. Thus, in model-based learning the agent uses the model dynamics for finding the optimal policy. \n", 10 | "\n", 11 | "**Model-free learning** - When the agent does not know the model dynamics of the environment then it is called model-free learning. That is, In model-free learning, an agent tries to find the optimal policy without the model dynamics. \n", 12 | "\n", 13 | "Thus, to summarize, in a model-free setting, the agent learns optimal policy without the model dynamics of the environment whereas, in a model-based setting, the agent learns the optimal policy with the model dynamics of the environment." 14 | ] 15 | } 16 | ], 17 | "metadata": { 18 | "kernelspec": { 19 | "display_name": "Python 3", 20 | "language": "python", 21 | "name": "python3" 22 | }, 23 | "language_info": { 24 | "codemirror_mode": { 25 | "name": "ipython", 26 | "version": 3 27 | }, 28 | "file_extension": ".py", 29 | "mimetype": "text/x-python", 30 | "name": "python", 31 | "nbconvert_exporter": "python", 32 | "pygments_lexer": "ipython3", 33 | "version": "3.6.9" 34 | } 35 | }, 36 | "nbformat": 4, 37 | "nbformat_minor": 2 38 | } 39 | -------------------------------------------------------------------------------- /similiar_pictures/static/index.js: -------------------------------------------------------------------------------- 1 | (function() 2 | { 3 | var canvas = document.querySelector( "#canvas" ); 4 | var context = canvas.getContext( "2d" ); 5 | canvas.width = 1; 6 | canvas.height = 1; 7 | 8 | var Mouse = { x: 0, y: 0 }; 9 | var lastMouse = { x: 0, y: 0 }; 10 | context.fillStyle="gray"; 11 | context.fillRect(0,0,canvas.width,canvas.height); 12 | context.color = "black"; 13 | context.lineWidth = 10; 14 | context.lineJoin = context.lineCap = 'round'; 15 | 16 | debug(); 17 | 18 | canvas.addEventListener( "mousemove", function( e ) 19 | { 20 | lastMouse.x = Mouse.x; 21 | lastMouse.y = Mouse.y; 22 | 23 | Mouse.x = e.pageX - this.offsetLeft; 24 | Mouse.y = e.pageY - this.offsetTop; 25 | 26 | }, false ); 27 | 28 | canvas.addEventListener( "mousedown", function( e ) 29 | { 30 | canvas.addEventListener( "mousemove", onPaint, false ); 31 | 32 | }, false ); 33 | 34 | canvas.addEventListener( "mouseup", function() 35 | { 36 | canvas.removeEventListener( "mousemove", onPaint, false ); 37 | 38 | }, false ); 39 | 40 | var onPaint = function() 41 | { 42 | context.lineWidth = context.lineWidth; 43 | context.lineJoin = "round"; 44 | context.lineCap = "round"; 45 | context.strokeStyle = context.color; 46 | 47 | context.beginPath(); 48 | context.moveTo( lastMouse.x, lastMouse.y ); 49 | context.lineTo( Mouse.x, Mouse.y ); 50 | context.closePath(); 51 | context.stroke(); 52 | }; 53 | 54 | function debug() 55 | { 56 | /* CLEAR BUTTON */ 57 | var clearButton = $( "#clearButton" ); 58 | 59 | clearButton.on( "click", function() 60 | { 61 | 62 | context.clearRect( 0, 0, 0, 0 ); 63 | context.fillStyle="white"; 64 | context.fillRect(0,0,canvas.width,canvas.height); 65 | 66 | }); 67 | 68 | /* COLOR SELECTOR */ 69 | 70 | $( "#colors" ).change(function() 71 | { 72 | var color = $( "#colors" ).val(); 73 | context.color = color; 74 | }); 75 | 76 | /* LINE WIDTH */ 77 | 78 | $( "#lineWidth" ).change(function() 79 | { 80 | context.lineWidth = $( this ).val(); 81 | }); 82 | } 83 | }()); -------------------------------------------------------------------------------- /Bayesian_AB/sequence (1).py: -------------------------------------------------------------------------------- 1 | import yt.wrapper as yt 2 | import pandas as pd 3 | import numpy as np 4 | import timeit 5 | import datetime 6 | import os 7 | from scipy.stats import beta, norm, uniform 8 | from nile.api.v1 import statface as ns 9 | from nile.api.v1 import cli 10 | import ast 11 | import re 12 | from multiprocessing import Pool 13 | from scipy.stats import ttest_ind,mannwhitneyu,kstest,normaltest 14 | from yql.api.v1.client import YqlClient 15 | from make_table import create_ab_table 16 | from calculate_posterior import make_decision 17 | import subprocess 18 | 19 | def analyze_sequential_test(ab_name, date_start, date_end): 20 | 21 | try: 22 | print subprocess.check_output(['rm',ab_name]) 23 | print subprocess.check_output(['rm','{}.xlsx'.format(ab_name)]) 24 | except: 25 | pass 26 | 27 | client = yt.YtClient("",token='') 28 | # create_ab_table(date_start,date_end,ab_name) 29 | table = create_ab_table(date_start,date_end,ab_name) 30 | 31 | x = client.read_table(table) 32 | results = [] 33 | for row in x: 34 | results.append(row) 35 | df = pd.DataFrame(results) 36 | df_agg = df.groupby('ab').sum() 37 | df_agg['ctr'] = df_agg.clicks/df_agg.shows 38 | df_agg['convert'] = df_agg.buys/df_agg.clicks 39 | 40 | list_of_variants = df_agg.index.tolist() # posteriors list 41 | posterior_dict_convert = {} 42 | posterior_dict_ctr = {} 43 | for var in list_of_variants: 44 | posterior_dict_convert[var] = beta(249 + df_agg.loc[var].buys, \ 45 | 14269 - df_agg.loc[var].buys + df_agg.loc[var].clicks) 46 | 47 | posterior_dict_ctr[var] = beta(110 + df_agg.loc[var].clicks, \ 48 | 5550 - df_agg.loc[var].clicks + df_agg.loc[var].shows) 49 | 50 | for metric in ['ctr','convert']: 51 | for variant in list_of_variants: 52 | make_decision(variant,'control',metric,posterior_dict_ctr, posterior_dict_convert, df_agg, \ 53 | ab_name) 54 | df_agg.to_excel('{}.xlsx'.format(ab_name)) 55 | 56 | -------------------------------------------------------------------------------- /santa_RL/reward_calc.py: -------------------------------------------------------------------------------- 1 | def calc_reward(array_actions, n_members): 2 | 3 | # if array_actions[0] == 0: 4 | # reward = 0 5 | # elif array_actions[0] == 1: 6 | # reward = -1 * 50 7 | # elif array_actions[0] == 2: 8 | # reward = -1 * (50 + n_members * 9) 9 | # elif array_actions[0] == 3: 10 | # reward = -1 * (100 + n_members * 9) 11 | # elif array_actions[0] == 4: 12 | # reward = -1 * (200 + n_members * 9) 13 | # elif array_actions[0] == 5: 14 | # reward = -1 * (200 + n_members * 18) 15 | # elif array_actions[0] == 6: 16 | # reward = -1 * (300 + n_members * 18) 17 | # elif array_actions[0] == 7: 18 | # reward = -1 * (300 + n_members * 36) 19 | # elif array_actions[0] == 8: 20 | # reward = -1 * (400 + n_members * 36) 21 | # elif array_actions[0] == 9: 22 | # reward = -1 * (500 + n_members * 36 + n_members * 199) 23 | 24 | if array_actions[0] == 999: 25 | reward = -1 * 100000 26 | penalty = 1 * (500 + 36 * n_members + n_members * 398) 27 | elif array_actions[0] == 0: 28 | reward = 10000 29 | penalty = 0 30 | elif array_actions[0] == 1: 31 | reward = 500 32 | penalty = 1 * 50 33 | elif array_actions[0] == 2: 34 | reward = 250 35 | penalty = 1 * (50 + n_members * 9) 36 | elif array_actions[0] == 3: 37 | reward = 125 38 | penalty = 1 * (100 + n_members * 9) 39 | elif array_actions[0] == 4: 40 | reward = 62 41 | penalty = 1 * (200 + n_members * 9) 42 | elif array_actions[0] == 5: 43 | reward = 31 44 | penalty = 1 * (200 + n_members * 18) 45 | elif array_actions[0] == 6: 46 | reward = 16 47 | penalty = 1 * (300 + n_members * 18) 48 | elif array_actions[0] == 7: 49 | reward = 8 50 | penalty = 1 * (300 + n_members * 36) 51 | elif array_actions[0] == 8: 52 | reward = 4 53 | penalty = 1 * (400 + n_members * 36) 54 | elif array_actions[0] == 9: 55 | reward = 2 56 | penalty = (500 + n_members * 36 + n_members * 199) 57 | 58 | return -1*penalty, penalty 59 | -------------------------------------------------------------------------------- /approximation.md: -------------------------------------------------------------------------------- 1 | Задача 2: аппроксимация функции 2 | 3 | Рассмотрим сложную математическую функцию на отрезке [1, 15]: 4 | 5 | f(x) = sin(x / 5) * exp(x / 10) + 5 * exp(-x / 2) 6 | 7 | 8 | Она может описывать, например, зависимость оценок, которые выставляют определенному сорту вина эксперты, в зависимости от возраста этого вина. По сути, задача машинного обучения состоит в том, чтобы приблизить сложную зависимость с помощью функции из определенного семейства. В этом задании мы будем приближать указанную функцию с помощью многочленов. 9 | 10 | Как известно, многочлен степени n (то есть w_0 + w_1 x + w_2 x^2 + ... + w_n x^n) однозначно определяется любыми n + 1 различными точками, через которые он проходит. Это значит, что его коэффициенты w_0, ... w_n можно определить из следующей системы линейных уравнений: 11 | 12 | 13 | где через x_1, ..., x_n, x_{n+1} обозначены точки, через которые проходит многочлен, а через f(x_1), ..., f(x_n), f(x_{n+1}) — значения, которые он должен принимать в этих точках. 14 | 15 | Воспользуемся описанным свойством, и будем находить приближение функции многочленом, решая систему линейных уравнений. 16 | 17 | Сформируйте систему линейных уравнений (то есть задайте матрицу коэффициентов A и свободный вектор b) для многочлена первой степени, который должен совпадать с функцией f в точках 1 и 15. Решите данную систему с помощью функции scipy.linalg.solve. Нарисуйте функцию f и полученный многочлен. Хорошо ли он приближает исходную функцию? 18 | Повторите те же шаги для многочлена второй степени, который совпадает с функцией f в точках 1, 8 и 15. Улучшилось ли качество аппроксимации? 19 | Повторите те же шаги для многочлена третьей степени, который совпадает с функцией f в точках 1, 4, 10 и 15. Хорошо ли он аппроксимирует функцию? Коэффициенты данного многочлена (четыре числа в следующем порядке: w_0, w_1, w_2, w_3) являются ответом на задачу. Округлять коэффициенты не обязательно, но при желании можете произвести округление до второго знака (т.е. до числа вида 0.42) 20 | Запишите полученные числа в файл, разделив пробелами. Обратите внимание, что файл должен состоять из одной строки, в конце которой не должно быть переноса. Пример файла с решением вы можете найти в конце задания (submission-2.txt). 21 | -------------------------------------------------------------------------------- /similiar_pictures/model/model.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "keras_version": "1.1.0", "config": [{"class_name": "Convolution2D", "config": {"b_regularizer": null, "W_constraint": null, "b_constraint": null, "name": "convolution2d_1", "activity_regularizer": null, "trainable": true, "dim_ordering": "tf", "nb_col": 3, "subsample": [1, 1], "init": "glorot_uniform", "bias": true, "nb_filter": 32, "input_dtype": "float32", "border_mode": "valid", "batch_input_shape": [null, 28, 28, 1], "W_regularizer": null, "activation": "linear", "nb_row": 3}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_1"}}, {"class_name": "Convolution2D", "config": {"W_constraint": null, "b_constraint": null, "name": "convolution2d_2", "activity_regularizer": null, "trainable": true, "dim_ordering": "tf", "nb_col": 3, "subsample": [1, 1], "init": "glorot_uniform", "bias": true, "nb_filter": 32, "border_mode": "valid", "b_regularizer": null, "W_regularizer": null, "activation": "linear", "nb_row": 3}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_2"}}, {"class_name": "MaxPooling2D", "config": {"name": "maxpooling2d_1", "trainable": true, "dim_ordering": "tf", "pool_size": [2, 2], "strides": [2, 2], "border_mode": "valid"}}, {"class_name": "Dropout", "config": {"p": 0.25, "trainable": true, "name": "dropout_1"}}, {"class_name": "Flatten", "config": {"trainable": true, "name": "flatten_1"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_1", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 128}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_3"}}, {"class_name": "Dropout", "config": {"p": 0.5, "trainable": true, "name": "dropout_2"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_2", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 10}}, {"class_name": "Activation", "config": {"activation": "softmax", "trainable": true, "name": "activation_4"}}]} -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.03. Reinforcement Learning Algorithm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Reinforcement Learning Algorithm\n", 8 | "\n", 9 | "The steps involved in a typical reinforcement learning algorithm are given as follows:\n", 10 | "\n", 11 | "1. First, the agent interacts with the environment by performing an action\n", 12 | "2. The agent performs an action and moves from one state to another\n", 13 | "3. Then the agent will receive a reward based on the action it performed\n", 14 | "4. Based on the reward, the agent will understand that whether the action is good or bad\n", 15 | "5. If the action was good, that is, if the agent received a positive reward, then the agent will prefer performing that action else the agent will try performing other actions which can result in a positive reward. So reinforcement learning is basically a trial and error learning process.\n", 16 | "\n", 17 | "\n", 18 | "Now, let's revisit our chess game example. The agent(software program) is basically the chess player. So, the agent interacts with the environment(chessboard) by performing some action(moves). If the agent gets a positive reward for an action, then it will prefer performing that action else it will find other action which gives a positive reward. \n", 19 | "\n", 20 | "Ultimately, the goal of the agent is to maximize the reward it gets. If the agent receives a good reward then it means that it performs a good action. If the agent performs a good action then it implies that it can win the game. Thus, the agent learns to win the game by maximizing the reward. " 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "Python 3", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 3 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython3", 40 | "version": "3.6.9" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 2 45 | } 46 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/04. Monte Carlo Methods/4.01. Understanding the Monte Carlo Method.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Understanding the Monte Carlo method\n", 8 | "\n", 9 | "Before understanding how the Monte Carlo method is useful in reinforcement learning, first, let's understand what is Monte Carlo method and how does it work. The Monte Carlo method is a statistical technique used to find an approximate solution through sampling. That is, the Monte Carlo method approximates the probability of an outcome by running multiple trails. \n", 10 | "\n", 11 | "For instance, the Monte Carlo method approximates the expectation of a random variable by sampling and when the sample size is greater the approximation will be better. Let's suppose we have a random variable X and say we need to compute the expected value of X, that is E[X], then we can compute it by taking the sum of values of X multiplied by their respective probabilities as shown below:\n", 12 | "\n", 13 | "$$ E(X) = \\sum_{i=1}^N x_i p(x_i) $$\n", 14 | "\n", 15 | "But instead of computing the expectation like this, can we approximate them with the Monte Carlo method? Yes! We can estimate the expected value of X by just sampling the values of X for some N times and compute the average value of X as the expected value of X as shown below:\n", 16 | "\n", 17 | "$$ \\mathbb{E}_{x \\sim p(x)}[X] \\approx \\frac{1}{N} \\sum_i x_i $$\n", 18 | "\n", 19 | "\n", 20 | "When N is larger our approximation will be better. Thus, with the Monte Carlo method, we can approximate the solution through sampling and our approximation will be better when the sample size is large.\n", 21 | "\n", 22 | "In the upcoming sections, we will learn how exactly the Monte Carlo method is used in reinforcement learning. " 23 | ] 24 | } 25 | ], 26 | "metadata": { 27 | "kernelspec": { 28 | "display_name": "Python 3", 29 | "language": "python", 30 | "name": "python3" 31 | }, 32 | "language_info": { 33 | "codemirror_mode": { 34 | "name": "ipython", 35 | "version": 3 36 | }, 37 | "file_extension": ".py", 38 | "mimetype": "text/x-python", 39 | "name": "python", 40 | "nbconvert_exporter": "python", 41 | "pygments_lexer": "ipython3", 42 | "version": "3.6.9" 43 | } 44 | }, 45 | "nbformat": 4, 46 | "nbformat_minor": 2 47 | } 48 | -------------------------------------------------------------------------------- /textmining1.md: -------------------------------------------------------------------------------- 1 | Задача 1: сравнение предложений 2 | 3 | Дан набор предложений, скопированных с Википедии. Каждое из них имеет "кошачью тему" в одном из трех смыслов: 4 | 5 | кошки (животные) 6 | UNIX-утилита cat для вывода содержимого файлов 7 | версии операционной системы OS X, названные в честь семейства кошачьих 8 | Ваша задача — найти два предложения, которые ближе всего по смыслу к расположенному в самой первой строке. В качестве меры близости по смыслу мы будем использовать косинусное расстояние. 9 | 10 | sentences.txt 11 | Выполните следующие шаги: 12 | 13 | Скачайте файл с предложениями (sentences.txt). 14 | Каждая строка в файле соответствует одному предложению. Считайте их, приведите каждую к нижнему регистру с помощью строковой функции lower(). 15 | Произведите токенизацию, то есть разбиение текстов на слова. Для этого можно воспользоваться регулярным выражением, которое считает разделителем любой символ, не являющийся буквой: re.split('[^a-z]', t). Не забудьте удалить пустые слова после разделения. 16 | Составьте список всех слов, встречающихся в предложениях. Сопоставьте каждому слову индекс от нуля до (d - 1), где d — число различных слов в предложениях. Для этого удобно воспользоваться структурой dict. 17 | Создайте матрицу размера n * d, где n — число предложений. Заполните ее: элемент с индексом (i, j) в этой матрице должен быть равен количеству вхождений j-го слова в i-е предложение. У вас должна получиться матрица размера 22 * 254. 18 | Найдите косинусное расстояние от предложения в самой первой строке (In comparison to dogs, cats have not undergone...) до всех остальных с помощью функции scipy.spatial.distance.cosine. Какие номера у двух предложений, ближайших к нему по этому расстоянию (строки нумеруются с нуля)? Эти два числа и будут ответами на задание. 19 | Запишите полученные числа в файл, разделив пробелом. Обратите внимание, что файл должен состоять из одной строки, в конце которой не должно быть переноса. Пример файла с решением вы можете найти в конце задания (submission-1.txt). 20 | Совпадают ли ближайшие два предложения по тематике с первым? Совпадают ли тематики у следующих по близости предложений? 21 | Разумеется, использованный вами метод крайне простой. Например, он не учитывает формы слов (так, cat и cats он считает разными словами, хотя по сути они означают одно и то же), не удаляет из текстов артикли и прочие ненужные слова. Позже мы будем подробно изучать анализ текстов, где выясним, как достичь высокого качества в задаче поиска похожих предложений. 22 | -------------------------------------------------------------------------------- /Bayesian_AB/calculate_posterior (1).py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import timeit 4 | import datetime 5 | import os 6 | from scipy.stats import beta, norm, uniform 7 | 8 | 9 | 10 | def write_to_file(ab_name,text,metric_name): 11 | with open(str(ab_name), "a") as text_file: 12 | text_file.write(str(text) + ' for metric ' + metric_name+ '\n') 13 | 14 | 15 | def make_decision(var_1,var_2,metric_name, posterior_dict_ctr, posterior_dict_convert, df_agg, \ 16 | ab_name = 'test',print_anyway = True): 17 | 18 | if metric_name == 'ctr': 19 | posterior_dict = posterior_dict_ctr 20 | if metric_name == 'convert': 21 | posterior_dict = posterior_dict_convert 22 | 23 | threshold_of_caring = 0.0001 24 | xgrid_size = 20000 25 | 26 | empirical_var_1_mean = df_agg.loc[var_1][metric_name] 27 | empirical_var_2_mean = df_agg.loc[var_2][metric_name] 28 | B_greater_A = empirical_var_2_mean > empirical_var_1_mean 29 | 30 | x = np.mgrid[0:xgrid_size,0:xgrid_size] / float(20*xgrid_size) 31 | pdf_arr = posterior_dict[var_1].pdf(x[0]) * posterior_dict[var_2].pdf(x[1]) 32 | pdf_arr /= pdf_arr.sum() # normalization 33 | 34 | prob_error = np.zeros(shape=x[0].shape) 35 | if B_greater_A: 36 | prob_error[np.where(x[0] > x[1])] = 1.0 37 | else: 38 | prob_error[np.where(x[1] > x[0])] = 1.0 39 | 40 | expected_error = np.abs(x[0]-x[1]) 41 | 42 | expected_err_scalar = (expected_error * prob_error * pdf_arr).sum() 43 | 44 | if (expected_err_scalar < threshold_of_caring) or print_anyway: 45 | if B_greater_A: 46 | 47 | line_1 = "Probability that version {} is larger than {} is ".format(var_2,var_1) \ 48 | + str(((1-prob_error)*pdf_arr).sum()) 49 | line_2 = " Expected error is " + str(expected_err_scalar) 50 | 51 | write_to_file(ab_name,line_1,metric_name) 52 | write_to_file(ab_name,line_2,metric_name) 53 | else: 54 | line_1 = "Probability that version {} is larger than {} is _ ".format(var_1,var_2) \ 55 | + str(((1-prob_error)*pdf_arr).sum()) 56 | line_2 = " Expected error is " + str(expected_err_scalar) 57 | 58 | write_to_file(ab_name,line_1,metric_name) 59 | write_to_file(ab_name,line_2,metric_name) 60 | else: 61 | print "Continue test. Expected error was " + str(expected_err_scalar) + " > " + str(threshold_of_caring) 62 | 63 | 64 | -------------------------------------------------------------------------------- /similiar_pictures/model.json: -------------------------------------------------------------------------------- 1 | {"backend": "tensorflow", "config": [{"config": {"bias_constraint": null, "use_bias": true, "name": "conv2d_1", "batch_input_shape": [null, 28, 28, 1], "kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "padding": "valid", "kernel_regularizer": null, "kernel_size": [3, 3], "dtype": "float32", "kernel_constraint": null, "activity_regularizer": null, "trainable": true, "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "dilation_rate": [1, 1], "activation": "relu", "data_format": "channels_last", "filters": 32}, "class_name": "Conv2D"}, {"config": {"bias_constraint": null, "use_bias": true, "name": "conv2d_2", "kernel_regularizer": null, "padding": "valid", "kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_size": [3, 3], "strides": [1, 1], "kernel_constraint": null, "activity_regularizer": null, "trainable": true, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "dilation_rate": [1, 1], "activation": "relu", "data_format": "channels_last", "filters": 64}, "class_name": "Conv2D"}, {"config": {"pool_size": [2, 2], "name": "max_pooling2d_1", "trainable": true, "padding": "valid", "data_format": "channels_last", "strides": [2, 2]}, "class_name": "MaxPooling2D"}, {"config": {"name": "dropout_1", "trainable": true, "rate": 0.25}, "class_name": "Dropout"}, {"config": {"name": "flatten_1", "trainable": true}, "class_name": "Flatten"}, {"config": {"bias_constraint": null, "use_bias": true, "name": "dense_1", "units": 128, "kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_regularizer": null, "kernel_constraint": null, "activity_regularizer": null, "trainable": true, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "activation": "relu"}, "class_name": "Dense"}, {"config": {"name": "dropout_2", "trainable": true, "rate": 0.5}, "class_name": "Dropout"}, {"config": {"bias_constraint": null, "use_bias": true, "name": "dense_2", "units": 10, "kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_regularizer": null, "kernel_constraint": null, "activity_regularizer": null, "trainable": true, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "activation": "softmax"}, "class_name": "Dense"}], "class_name": "Sequential", "keras_version": "2.0.8"} -------------------------------------------------------------------------------- /poisson_bootstrap/poisson_bootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | from typing import Tuple, List 8 | import numpy as np 9 | import multiprocessing as mp 10 | import os 11 | from tqdm import tqdm_notebook 12 | from sklearn.preprocessing import scale 13 | 14 | 15 | # In[2]: 16 | 17 | 18 | N_bootstraps: int = 10000 19 | mp.set_start_method('fork', force=True) 20 | def poisson_bootstrap_tp_fp_fn_tn( 21 | bundle: Tuple[float, List[Tuple[float, float, float, int]]], 22 | ) ->List[np.ndarray]: 23 | threshold, data = bundle 24 | TP = np.zeros((N_bootstraps)) 25 | FP = np.zeros((N_bootstraps)) 26 | FN = np.zeros((N_bootstraps)) 27 | TN = np.zeros((N_bootstraps)) 28 | for current_label, current_predict, weight, index in data: 29 | np.random.seed(index) 30 | current_predict += np.random.normal(0,0.0125,1) # this can be replaced with precalc noise 31 | current_predict = int(np.clip(current_predict,0,1) >= threshold) 32 | p_sample = np.random.poisson(1, N_bootstraps) * weight # this can be replaced with precalc poisson 33 | 34 | if current_label == 1 and current_predict == 1: 35 | TP += p_sample 36 | if current_label == 1 and current_predict == 0: 37 | FN += p_sample 38 | if current_label == 0 and current_predict == 1: 39 | FP += p_sample 40 | if current_label == 0 and current_predict == 0: 41 | TN += p_sample 42 | 43 | return [TP, FP, FN, TN] 44 | 45 | 46 | 47 | # In[3]: 48 | 49 | 50 | N = 10**6 51 | labels = np.random.randint(0,2,N) 52 | predicts = np.clip(np.random.normal(0.5,1,N),0,1) 53 | weights = np.array([1 for _ in range(N)]) 54 | 55 | print(labels[:10]) 56 | print(predicts[:10]) 57 | print(weights[:10]) 58 | 59 | 60 | # In[5]: 61 | 62 | 63 | chunk_size = 1000 64 | threshold = 0.81 65 | generator = ( 66 | ( 67 | threshold, 68 | [ 69 | (labels[x + y], 70 | predicts[x + y], 71 | weights[x + y], 72 | x + y, 73 | ) 74 | 75 | for x in range(chunk_size) 76 | if x+y < N 77 | ], 78 | 79 | ) 80 | for y in range(0,N,chunk_size) 81 | 82 | ) 83 | 84 | 85 | # In[6]: 86 | 87 | 88 | cpu_to_use = np.max([os.cpu_count() - 3,1]) 89 | print(cpu_to_use) 90 | 91 | with mp.Pool(processes=cpu_to_use) as pool: 92 | stat_list = list(tqdm_notebook(pool.imap(poisson_bootstrap_tp_fp_fn_tn,generator), 93 | total = N//chunk_size)) 94 | 95 | TP, FP, FN, TN = np.sum(stat_list,0) 96 | print(TP[:10]) 97 | 98 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from easydict import EasyDict as edict 5 | 6 | 7 | # def _get_default_config(): 8 | # checkpoint_dict = dict() 9 | # checkpoint_dict[1] = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint/' + \ 10 | # 'resnet18_general_site_1_117_val_accuracy=0.3217415.pth' 11 | # checkpoint_dict[2] = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint_densenet121/' + \ 12 | # 'densenet121_general_site_1_77_val_accuracy=0.4011501.pth' 13 | # checkpoint_dict[3] = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint_densenet121_focal/' + \ 14 | # 'densenet121_general_site_1_66_val_accuracy=0.4329135.pth' 15 | # 16 | # model_dict = dict() 17 | # model_dict[1] = 'resnet18' 18 | # model_dict[2] = 'densenet121' 19 | # 20 | # exp_dict = dict() 21 | # exp_dict[1] = 'HEPG2' 22 | # exp_dict[2] = 'HUVEC' 23 | # exp_dict[3] = 'RPE' 24 | # exp_dict[4] = 'U2OS' 25 | # 26 | # config = edict() 27 | # config.checkpoint_folder = 'densenet121_focal' 28 | # 29 | # config.model = model_dict[2] 30 | # config.site = 1 # or 2 31 | # config.all = False # or False 32 | # config.experiment = exp_dict[4] 33 | # config.checkpoint_name = checkpoint_dict[3] 34 | # config.warm_start = True 35 | # config.random_seed = 42 36 | # config.batch_size = 12 37 | # return config 38 | 39 | 40 | def _get_default_config(): 41 | checkpoint_dict = dict() 42 | checkpoint_dict[1] = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint/' + \ 43 | 'resnet18_general_site_1_117_val_accuracy=0.3217415.pth' 44 | checkpoint_dict[2] = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint_densenet121/' + \ 45 | 'densenet121_general_site_2_74_val_accuracy=0.4164841.pth' 46 | checkpoint_dict[3] = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint_densenet121_focal/' + \ 47 | 'densenet121_general_site_2_67_val_accuracy=0.4394852.pth' 48 | 49 | model_dict = dict() 50 | model_dict[1] = 'resnet18' 51 | model_dict[2] = 'densenet121' 52 | 53 | exp_dict = dict() 54 | exp_dict[1] = 'HEPG2' 55 | exp_dict[2] = 'HUVEC' 56 | exp_dict[3] = 'RPE' 57 | exp_dict[4] = 'U2OS' 58 | 59 | config = edict() 60 | config.checkpoint_folder = 'densenet121_focal' 61 | config.model = model_dict[2] 62 | config.site = 2 # or 2k 63 | config.all = False # or False 64 | config.experiment = exp_dict[4] 65 | config.checkpoint_name = checkpoint_dict[3] 66 | config.warm_start = True 67 | config.random_seed = 24 68 | config.batch_size = 12 69 | return config 70 | -------------------------------------------------------------------------------- /similiar_pictures/train.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[43]: 5 | 6 | 7 | from keras.applications.resnet50 import ResNet50 8 | from keras.preprocessing import image 9 | from keras.applications.resnet50 import preprocess_input, decode_predictions 10 | import numpy as np 11 | from keras.models import Model 12 | from keras.layers.merge import Concatenate 13 | from keras.layers import Input, merge, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Cropping2D 14 | import os 15 | import tqdm 16 | import pandas as pd 17 | import pickle 18 | from scipy.spatial.distance import cosine 19 | 20 | 21 | # In[2]: 22 | 23 | 24 | model = ResNet50(weights='imagenet') 25 | 26 | 27 | # In[47]: 28 | 29 | 30 | def preproc_image(img_path): 31 | img = image.load_img('mirflickr/{}'.format(img_path), target_size=(224, 224)) 32 | x = image.img_to_array(img) 33 | x = np.expand_dims(x, axis=0) 34 | x = preprocess_input(x) 35 | return x 36 | test_img = preproc_image('im3.jpg') 37 | 38 | 39 | # In[4]: 40 | 41 | 42 | model.layers.pop() 43 | model2 = Model(model.input, model.layers[-1].output) 44 | if np.sum(model2.get_weights()[0] - model.get_weights()[0]) == 0: 45 | print('model is ok') 46 | 47 | 48 | # In[5]: 49 | 50 | 51 | preds = model.predict(test_img) 52 | # decode the results into a list of tuples (class, description, probability) 53 | # (one such list for each sample in the batch) 54 | print('Predicted:', decode_predictions(preds, top=3)[0]) 55 | 56 | 57 | # In[13]: 58 | 59 | 60 | list_names = os.listdir('mirflickr') 61 | list_names.pop(0) 62 | list_names = list_names[:-1] 63 | 64 | 65 | # In[ ]: 66 | 67 | 68 | vector_representation = [] 69 | for name in tqdm.tqdm(list_names,miniters=10000): 70 | img = preproc_image(name) 71 | vector_representation.append(model2.predict(img)) 72 | 73 | 74 | # In[45]: 75 | 76 | 77 | d = dict((key, value) for (key, value) in zip(list_names,vector_representation)) 78 | 79 | 80 | # In[23]: 81 | 82 | 83 | with open('dict_representation.pickle', 'wb') as handle: 84 | pickle.dump(d, handle, protocol=pickle.HIGHEST_PROTOCOL) 85 | 86 | 87 | # In[78]: 88 | 89 | 90 | img = preproc_image('im50.jpg') 91 | test_vector = model2.predict(img) 92 | 93 | def find_distance(vector): 94 | return cosine(test_vector,vector) 95 | 96 | distance = map(find_distance,d.values()) 97 | 98 | distances = pd.DataFrame(distance) 99 | distances['img'] = pd.DataFrame(d.keys()) 100 | distances.rename(columns = {0:'distance'},inplace=True) 101 | 102 | print(distances.sort_values(by='distance').img.values[0]) 103 | 104 | 105 | # In[121]: 106 | 107 | 108 | test = distances.sort_values(by='distance').img.values[0:5] 109 | 110 | 111 | # In[83]: 112 | 113 | 114 | model2.save('model_to_predict_vector.h5') 115 | 116 | -------------------------------------------------------------------------------- /santa_RL/give_reward_monte.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from reward_calc import calc_reward 4 | from config import _get_default_config 5 | import os 6 | 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 8 | config = _get_default_config() 9 | N_ACTIONS = config.N_ACTIONS 10 | MAX_CAPACITY = config.MAX_CAPACITY 11 | DAYS_OF_MAX_CAPACITY = config.DAYS_OF_MAX_CAPACITY 12 | ADDITIONAL_REWARD = config.ADDITIONAL_REWARD 13 | REWARD_SCALE = config.REWARD_SCALE 14 | GAMMA = config.gamma 15 | 16 | 17 | def multiply(value): 18 | number, power = value 19 | return number * GAMMA ** (power - 1) 20 | 21 | 22 | def summa_(value): 23 | number, power = value 24 | return number 25 | 26 | 27 | def give_reward(state, dqn, population_dict, df, df_standard, final_reward, episodes=100): 28 | pop_dict_local = population_dict.copy() 29 | state_local = torch.empty_like(state).copy_(state) 30 | indexes_zero = np.where(state_local.numpy() == 0)[1] 31 | indexes_zero = indexes_zero[indexes_zero < 5000] 32 | episode_counter = 0 33 | number_episodes = min(episodes, len(indexes_zero)) 34 | episodes_indexes = list(np.random.choice(indexes_zero, size=number_episodes, replace=False)) 35 | seq_rewards = [] # to store Gt and index t 36 | reward_list = [[final_reward, 1]] # to store reward Rt and index t 37 | for position in episodes_indexes: # not allocated families 38 | mask_local = torch.zeros((1, N_ACTIONS)) 39 | current_row = df[position] # take not allocated family 40 | current_row = np.array(current_row[1:N_ACTIONS + 1].tolist() + [current_row[-1]]) 41 | days = current_row[:-1] 42 | n_members = current_row[-1] 43 | for n_pos, day_var in enumerate(days[:-1]): 44 | if pop_dict_local[day_var] + n_members > MAX_CAPACITY: 45 | mask_local[0, n_pos] = -1 * np.inf 46 | # blocked = (mask_local == -np.inf).sum().numpy() 47 | data = torch.Tensor(df_standard[position][1:]).unsqueeze(0) 48 | nn_state = torch.cat((data, state_local), dim=1) 49 | # if blocked != N_ACTIONS: 50 | action, model_output = dqn.select_max_action(nn_state, mask_local) 51 | array_actions = action.numpy() 52 | action = action.numpy()[0] 53 | 54 | if action != N_ACTIONS - 1: 55 | day = current_row[:-1][action] 56 | else: 57 | valid_days = np.array(list(map(int, pop_dict_local.values()))) + n_members <= MAX_CAPACITY 58 | valid_days = np.array(list(range(1, 101)))[valid_days] 59 | day = np.random.choice(valid_days) 60 | array_actions = [999] 61 | 62 | pop_dict_local[day] += n_members 63 | state_local[0, position + day - 1] += n_members / MAX_CAPACITY # update state 64 | state_local[0, position] = (day - 50.5) / 29.8 65 | 66 | g_t = np.sum(list(map(multiply, reward_list))) + GAMMA ** (episode_counter + 1) * model_output 67 | seq_rewards.append([g_t, episode_counter + 1]) 68 | reward, penalty = calc_reward(array_actions, n_members) 69 | reward /= REWARD_SCALE 70 | episode_counter += 1 71 | reward_list.append([reward, 1 + episode_counter]) 72 | 73 | return (1 - GAMMA) * np.sum(list(map(multiply, seq_rewards))) 74 | -------------------------------------------------------------------------------- /tips_tricks/5_2_Regularizing model to avoid overfitting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "from itertools import combinations\n", 14 | "from catboost import CatBoostClassifier\n", 15 | "from sklearn.model_selection import train_test_split, KFold\n", 16 | "from sklearn.metrics import roc_auc_score\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "np.random.seed(42)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "df = pd.read_csv('train.csv')\n", 31 | "y = df.target\n", 32 | "\n", 33 | "df.drop(['ID', 'target'], axis=1, inplace=True)\n", 34 | "df.fillna(-9999, inplace=True)\n", 35 | "cat_features_ids = np.where(df.apply(pd.Series.nunique) < 30000)[0].tolist()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "train, test, y_train, y_test = train_test_split(df, y, test_size = 0.1)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Roc-auc score with Catboost without regularization: 0.7939610054617733\n", 59 | "Roc-auc score with Catboost with regularization: 0.7961023589633582\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, eval_metric='AUC', logging_level='Silent')\n", 65 | "clf.fit(train, y_train, cat_features=cat_features_ids)\n", 66 | "prediction = clf.predict_proba(test)\n", 67 | "print('Roc-auc score with Catboost without regularization:',roc_auc_score(y_test, prediction[:, 1]))\n", 68 | "\n", 69 | "clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, \n", 70 | " eval_metric='AUC', logging_level='Silent', l2_leaf_reg=3, \n", 71 | " model_size_reg = 3)\n", 72 | "clf.fit(train, y_train, cat_features=cat_features_ids)\n", 73 | "prediction = clf.predict_proba(test)\n", 74 | "print('Roc-auc score with Catboost with regularization:',roc_auc_score(y_test, prediction[:, 1]))" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.6.1" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } 100 | -------------------------------------------------------------------------------- /tips_tricks/5_3_Adversarial Validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "from itertools import combinations\n", 14 | "from catboost import CatBoostClassifier\n", 15 | "from sklearn.model_selection import train_test_split, KFold\n", 16 | "from sklearn.metrics import roc_auc_score\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "np.random.seed(42)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "train = pd.read_csv('train.csv')\n", 31 | "y = train.target\n", 32 | "test = pd.read_csv('./test.csv')\n", 33 | "train.drop(['ID', 'target'], axis=1, inplace=True)\n", 34 | "test.drop(['ID'], axis=1, inplace=True)\n", 35 | "train.fillna(-9999, inplace=True)\n", 36 | "test.fillna(-9999, inplace=True)\n", 37 | "cat_features_ids = np.where(train.apply(pd.Series.nunique) < 30000)[0].tolist()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "Number of train samples from test distribution: 49142\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "y1 = np.ones_like(y)\n", 55 | "y2 = np.zeros((test.shape[0],))\n", 56 | "y_all = np.hstack([y1, y2])\n", 57 | "all_ = pd.concat([train, test])\n", 58 | "clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, eval_metric='AUC', logging_level='Silent')\n", 59 | "clf.fit(all_, y_all, cat_features=cat_features_ids)\n", 60 | "prediction = clf.predict(train)\n", 61 | "best_val = train[prediction == 0]\n", 62 | "print('Number of train samples from test distribution:', best_val.shape[0])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Validation score: 0.7470119528903851\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, eval_metric='AUC', logging_level='Silent')\n", 80 | "clf.fit(train.loc[prediction != 0, :], y[prediction != 0], cat_features=cat_features_ids)\n", 81 | "prediction_val = clf.predict_proba(best_val)\n", 82 | "print('Validation score:', roc_auc_score(y[prediction == 0], prediction_val[:, 1]))" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python 3", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.6.1" 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 2 107 | } 108 | -------------------------------------------------------------------------------- /similiar_pictures/app.py: -------------------------------------------------------------------------------- 1 | 2 | from flask import Flask, render_template, request 3 | from scipy.misc import imsave, imread, imresize 4 | import numpy as np 5 | import keras.models 6 | from keras.applications.resnet50 import ResNet50 7 | from keras.preprocessing import image 8 | from keras.applications.resnet50 import preprocess_input, decode_predictions 9 | import numpy as np 10 | from flask import Flask, request, render_template, send_from_directory 11 | from keras.models import Model 12 | from keras.layers.merge import Concatenate 13 | from keras.layers import Input, merge, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Cropping2D 14 | import os 15 | import tqdm 16 | import pandas as pd 17 | import pickle 18 | from scipy.spatial.distance import cosine 19 | import re 20 | import base64 21 | import sys 22 | import os 23 | 24 | sys.path.append(os.path.abspath("./model")) 25 | from load import * 26 | 27 | app = Flask(__name__) 28 | global model 29 | model, dict_vector = init() 30 | ################################# 31 | import os 32 | from flask import Flask, request, redirect, url_for 33 | from werkzeug.utils import secure_filename 34 | 35 | UPLOAD_FOLDER = './uploads' 36 | ALLOWED_EXTENSIONS = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif']) 37 | 38 | app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER 39 | 40 | 41 | def allowed_file(filename): 42 | return '.' in filename and \ 43 | filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS 44 | 45 | 46 | @app.route('/', methods=['GET', 'POST']) 47 | def upload_file(): 48 | global name_for_file 49 | if request.method == 'POST': 50 | if 'file' not in request.files: 51 | flash('No file part') 52 | return redirect(request.url) 53 | file = request.files['file'] 54 | 55 | if file.filename == '': 56 | flash('No selected file') 57 | return redirect(request.url) 58 | if file and allowed_file(file.filename): 59 | filename = secure_filename(file.filename) 60 | name_for_file = filename 61 | file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) 62 | 63 | return render_template("index.html") 64 | 65 | 66 | 67 | def preproc_image(img_path): 68 | img = image.load_img(img_path, target_size=(224, 224)) 69 | x = image.img_to_array(img) 70 | x = np.expand_dims(x, axis=0) 71 | x = preprocess_input(x) 72 | return x 73 | 74 | 75 | @app.route('/') 76 | def index(): 77 | 78 | return render_template("index.html") 79 | 80 | 81 | @app.route('/predict/', methods=['GET', 'POST']) 82 | def predict(): 83 | 84 | print("debug") 85 | 86 | x = preproc_image('uploads/' + name_for_file) 87 | test_vector = model.predict(x) 88 | 89 | def find_distance(vector): 90 | return cosine(test_vector, vector) 91 | 92 | distance = map(find_distance, dict_vector.values()) 93 | 94 | distances = pd.DataFrame(distance) 95 | distances['img'] = pd.DataFrame(dict_vector.keys()) 96 | distances.rename(columns={0: 'distance'}, inplace=True) 97 | 98 | result = distances.sort_values(by='distance').img.values[0:5] 99 | 100 | string_output = "" 101 | for i in result: 102 | string_output+=i+" " 103 | return string_output 104 | 105 | 106 | 107 | if __name__ == "__main__": 108 | port = int(os.environ.get('PORT', 5000)) 109 | app.run(host='0.0.0.0', port=port) 110 | 111 | -------------------------------------------------------------------------------- /similiar_pictures/static/style.css: -------------------------------------------------------------------------------- 1 | @import url('http://fonts.googleapis.com/css?family=Open+Sans:400,300'); 2 | 3 | * { margin: 0; padding: 0; } 4 | 5 | html 6 | { 7 | background-image: -webkit-radial-gradient(top center, circle cover, #E7E7E8 30%, #C6C6C6); 8 | background-image: -moz-radial-gradient(top center, circle cover, #E7E7E8 30%, #C6C6C6); 9 | background-image: -ms-radial-gradient(top center, circle cover, #E7E7E8 30%, #C6C6C6); 10 | background-image: -o-radial-gradient(top center, circle cover, #E7E7E8 30%, #C6C6C6); 11 | background-image: radial-gradient(top center, circle cover, #E7E7E8 30%, #C6C6C6); 12 | height: 100%; 13 | -webkit-tap-highlight-color: rgba(0, 0, 0, 0); 14 | -webkit-user-select: none; 15 | -moz-user-select: none; 16 | -ms-user-select: none; 17 | -o-user-select: none; 18 | user-select: none; 19 | overflow: hidden; 20 | 21 | 22 | } 23 | 24 | h1 25 | { 26 | 27 | top: 20px; 28 | left: 0; 29 | right: 0; 30 | text-align: center; 31 | font-family: 'Open Sans', Helvetica; 32 | color: #807F7D; 33 | font-size: 25px; 34 | font-weight: 300; 35 | z-index: -1; 36 | } 37 | 38 | h1 span { color: #4DAF7C; font-weight: 400; } 39 | 40 | h2 41 | { 42 | 43 | margin-top: 80px; 44 | left: 0; 45 | right: 0; 46 | text-align: center; 47 | font-family: 'Open Sans', Helvetica; 48 | color: #807F7D; 49 | font-size: 25px; 50 | font-weight: 300; 51 | z-index: -1; 52 | } 53 | 54 | h2 span { color: #4DAF7C; font-weight: 400; } 55 | 56 | 57 | /* Debug */ 58 | 59 | #debug 60 | { 61 | position: absolute; 62 | top: 0; 63 | left: 0; 64 | padding: 5px; 65 | } 66 | .myButton { 67 | -moz-box-shadow: 0px 10px 14px -7px #3e7327; 68 | -webkit-box-shadow: 0px 10px 14px -7px #3e7327; 69 | box-shadow: 0px 10px 14px -7px #3e7327; 70 | background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #4DAF7C), color-stop(1, #72b352)); 71 | background:-moz-linear-gradient(top, #4DAF7C 5%, #72b352 100%); 72 | background:-webkit-linear-gradient(top, #4DAF7C 5%, #72b352 100%); 73 | background:-o-linear-gradient(top, #4DAF7C 5%, #72b352 100%); 74 | background:-ms-linear-gradient(top, #4DAF7C 5%, #72b352 100%); 75 | background:linear-gradient(to bottom, #4DAF7C 5%, #72b352 100%); 76 | filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#4DAF7C', endColorstr='#72b352',GradientType=0); 77 | background-color:#4DAF7C; 78 | -moz-border-radius:4px; 79 | -webkit-border-radius:4px; 80 | border-radius:4px; 81 | border:1px solid #4b8f29; 82 | display:inline-block; 83 | cursor:pointer; 84 | color:#ffffff; 85 | font-family:Arial; 86 | font-size:15px; 87 | padding:6px 13px; 88 | text-decoration:none; 89 | text-shadow:0px 1px 0px #5b8a3c; 90 | } 91 | .myButton:hover { 92 | background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #72b352), color-stop(1, #4DAF7C)); 93 | background:-moz-linear-gradient(top, #72b352 5%, #4DAF7C 100%); 94 | background:-webkit-linear-gradient(top, #72b352 5%, #4DAF7C 100%); 95 | background:-o-linear-gradient(top, #72b352 5%, #4DAF7C 100%); 96 | background:-ms-linear-gradient(top, #72b352 5%, #4DAF7C 100%); 97 | background:linear-gradient(to bottom, #72b352 5%, #4DAF7C 100%); 98 | filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#72b352', endColorstr='#4DAF7C',GradientType=0); 99 | background-color:#4DAF7C; 100 | } 101 | .myButton:active { 102 | position:relative; 103 | top:1px; 104 | } 105 | 106 | 107 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.02. Key Elements of Reinforcement Learning .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Key Elements of Reinforcement Learning \n", 8 | "In this section, we will learn about some of the key elements of reinforcement learning:\n", 9 | "\n", 10 | "## Agent \n", 11 | "An agent is a software program that learns to make intelligent decisions. We can say that an agent is a learner in the reinforcement learning setting. For instance, a chess player can be considered as an agent as the player learns to make the best moves(decisions) in order to win the game. Similarly, a Super Mario in a video game can be considered as an agent since Mario explores the game and learns to make the best move in the game.\n", 12 | "\n", 13 | "## Environment \n", 14 | "The environment is the world of the agent. The agent stays within the environment. For instance, coming back to our chess game, a chessboard is called the environment. Since the chessplayer(agent) learns to play the game of chess within the chessboard(environment). Similarly in the super Mario game, the world of super Mario game is called the environment. \n", 15 | "\n", 16 | "## State and action\n", 17 | "A state is a position or a moment in the environment where the agent can be in. We learned that the agent stays within the environment and there can be many positions in the environment where the agent can stay in and those positions are called state. For instance, in our chess game example, each position in the chessboard is called the state. The state is usually denoted by . \n", 18 | "\n", 19 | "The agent interacts with the environment and moves from one state to another by performing an action. In the chess game environment, the action is the move performed by the player(agent). The action is usually denoted by . \n", 20 | "\n", 21 | "## Reward\n", 22 | "We learned that the agent interacts with an environment by performing an action and moves from one state to another. Based on the action, the agent receives a reward. A reward is nothing but a numerical value, say, +1 for good action and -1 for a bad action. How do we decide if an action is good or bad?\n", 23 | "\n", 24 | "In our chess game example, if the agent makes the move such that if it defeats the opponent's chess piece then it can be considered as a good action and the agent receives the positive reward. Similarly, if the agent makes a move such that if the opponent defeats the agent's chess piece then it will be considered as bad action and the agent receives a negative reward. The reward is denoted by . \n", 25 | "\n", 26 | "Now that we have understood several key elements of reinforcement learning. In the next section, we will take a closer look at the reinforcement learning algorithm and learn how exactly it works. " 27 | ] 28 | } 29 | ], 30 | "metadata": { 31 | "kernelspec": { 32 | "display_name": "Python 3", 33 | "language": "python", 34 | "name": "python3" 35 | }, 36 | "language_info": { 37 | "codemirror_mode": { 38 | "name": "ipython", 39 | "version": 3 40 | }, 41 | "file_extension": ".py", 42 | "mimetype": "text/x-python", 43 | "name": "python", 44 | "nbconvert_exporter": "python", 45 | "pygments_lexer": "ipython3", 46 | "version": "3.6.9" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 2 51 | } 52 | -------------------------------------------------------------------------------- /big_data_for_engineers/Spark_bigramms.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark import SparkConf, SparkContext\n", 10 | "from itertools import combinations\n", 11 | "from collections import Counter\n", 12 | "sc = SparkContext(conf=SparkConf().setAppName(\"MyApp\").setMaster(\"local\"))\n", 13 | "pair_counter = Counter()\n", 14 | "import re\n", 15 | "\n", 16 | "def parse_article(line):\n", 17 | " try:\n", 18 | " article_id, text = unicode(line.rstrip()).split('\\t', 1)\n", 19 | " text = text.lower()\n", 20 | " text = re.sub(\"^\\W+|\\W+$\", \"\", text, flags=re.UNICODE)\n", 21 | " words = re.split(\"\\W*\\s+\\W*\", text, flags=re.UNICODE)\n", 22 | " return words\n", 23 | " except ValueError as e:\n", 24 | " return []\n", 25 | "\n", 26 | "wiki = sc.textFile(\"/data/wiki/en_articles_part/articles-part\", 16).map(parse_article)\n", 27 | "#wiki = wiki.map(lambda word :(word ,1)).reduceByKey(lambda x,y: x+y)\n", 28 | "result = wiki.take(1)[0]" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "def collect_pairs(lines):\n", 38 | " pair_counter = Counter()\n", 39 | " unique_tokens = sorted(set(lines)) # exclude duplicates in same line and sort to ensure one word is always before other\n", 40 | " combos = combinations(unique_tokens, 2)\n", 41 | " pair_counter += Counter(combos)\n", 42 | " return pair_counter" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "def find_bigrams(input_list):\n", 52 | " bigram_dict = {}\n", 53 | " for i in range(len(input_list)-1):\n", 54 | " if 'narodnaya' in (input_list[i], input_list[i+1])[0]:\n", 55 | " return ((str(input_list[i]) +\"_\" + str(input_list[i+1])),1)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "test = wiki.map(find_bigrams)\n", 65 | "test_2 = test.filter(lambda x: x != None)\n", 66 | "test_3 = test_2.reduceByKey(lambda v1,v2:v1 +v2)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "narodnaya_gazeta 1\n", 79 | "narodnaya_volya 3\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "for data in test_3.take(10000):\n", 85 | " print(data[0]+\" \"+ str(data[1]))\n", 86 | " " 87 | ] 88 | } 89 | ], 90 | "metadata": { 91 | "kernelspec": { 92 | "display_name": "Python 2", 93 | "language": "python", 94 | "name": "python2" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 2 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython2", 106 | "version": "2.7.12" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 2 111 | } 112 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/predict_multi.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from data_loader import tloader 3 | from tqdm import tqdm 4 | import pandas as pd 5 | import numpy as np 6 | from model_k import model_resnet_18 7 | import os 8 | from config import _get_default_config 9 | 10 | config = _get_default_config() 11 | MODEL_NAME = config.model 12 | 13 | # models_name = \ 14 | # [['resnet18_HEPG2_site_1_49_val_accuracy=0.1797468.pth', 'resnet18_HEPG2_site_2_74_val_accuracy=0.1959545.pth'], 15 | # ['resnet18_HUVEC_site_1_86_val_accuracy=0.575.pth', 'resnet18_HUVEC_site_2_91_val_accuracy=0.5804118.pth'], 16 | # ['resnet18_RPE_site_1_150_val_accuracy=0.2769424.pth', 'resnet18_RPE_site_2_47_val_accuracy=0.2866579.pth'], 17 | # ['resnet18_U2OS_site_1_64_val_accuracy=0.07565789.pth', 'resnet18_U2OS_site_2_50_val_accuracy=0.09120521.pth']] 18 | 19 | # models_name = \ 20 | # [['densenet121_HEPG2_site_1_103_val_accuracy=0.3316456.pth', 'densenet121_HEPG2_site_2_27_val_accuracy=0.3198483.pth'], 21 | # ['densenet121_HUVEC_site_1_72_val_accuracy=0.6909091.pth', 'densenet121_HUVEC_site_2_120_val_accuracy=0.7067334.pth'], 22 | # ['densenet121_RPE_site_1_66_val_accuracy=0.4235589.pth', 'densenet121_RPE_site_2_92_val_accuracy=0.4504624.pth'], 23 | # ['densenet121_U2OS_site_1_75_val_accuracy=0.1151316.pth', 'densenet121_U2OS_site_2_75_val_accuracy=0.1433225.pth']] 24 | 25 | 26 | models_name = \ 27 | [['densenet121_HEPG2_site_1_22_val_accuracy=0.3101266.pth', 'densenet121_HEPG2_site_2_93_val_accuracy=0.335019.pth'], 28 | ['densenet121_HUVEC_site_1_49_val_accuracy=0.6846591.pth', 'densenet121_HUVEC_site_2_87_val_accuracy=0.6956038.pth'], 29 | ['densenet121_RPE_site_1_47_val_accuracy=0.3934837.pth', 'densenet121_RPE_site_2_85_val_accuracy=0.4319683.pth'], 30 | ['densenet121_U2OS_site_1_54_val_accuracy=0.1085526.pth', 'densenet121_U2OS_site_2_55_val_accuracy=0.1465798.pth']] 31 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 32 | 33 | path = '/mnt/ssd1/datasets/Recursion_class/recurs_proj/checkpoint_{}'.format(config.checkpoint_folder) 34 | path_data = '/mnt/ssd1/datasets/Recursion_class/' 35 | device = 'cuda' 36 | model_1 = model_resnet_18 37 | model_2 = model_resnet_18 38 | 39 | preds_list = [] 40 | for checkpoint_name in models_name: 41 | checkpoint_1 = torch.load(path + '/' + checkpoint_name[0]) 42 | checkpoint_2 = torch.load(path + '/' + checkpoint_name[1]) 43 | model_1.load_state_dict(checkpoint_1) 44 | model_1.to(device) 45 | model_2.load_state_dict(checkpoint_2) 46 | model_2.to(device) 47 | with torch.no_grad(): 48 | preds = np.empty(0) 49 | for x, _ in tqdm(tloader): 50 | x = x.to(device) 51 | output_1 = model_1(x) 52 | output_2 = model_2(x) 53 | output = output_1 + output_2 54 | idx = output.max(dim=-1)[1].cpu().numpy() 55 | preds = np.append(preds, idx, axis=0) 56 | 57 | preds_list.append(preds) 58 | 59 | 60 | submission = pd.read_csv(path_data + '/test.csv') 61 | final_results = [] 62 | 63 | for n, experiment in enumerate(submission.experiment.values): 64 | if 'HEPG2' in experiment: 65 | final_results.append(preds_list[0][n]) 66 | elif 'HUVEC' in experiment: 67 | final_results.append(preds_list[1][n]) 68 | elif 'RPE' in experiment: 69 | final_results.append(preds_list[2][n]) 70 | elif 'U2OS' in experiment: 71 | final_results.append(preds_list[3][n]) 72 | 73 | 74 | submission['sirna'] = np.array(final_results).astype(int) 75 | submission.to_csv('submission_{}.csv'.format('multi_8_{}'.format(config.model)), index=False, columns=['id_code', 'sirna']) 76 | 77 | 78 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.12. Applications of Reinforcement Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Applications of Reinforcement Learning\n", 8 | "\n", 9 | "Reinforcement learning has evolved rapidly over the past couple of years with a wide range of applications ranging from playing games to self-driving cars. One of the major reasons for this evolution is due to deep reinforcement learning (DRL) which is a combination of reinforcement learning and deep learning. We will learn about the various state of the art deep reinforcement learning algorithms in the upcoming chapters so be excited. In this section, we will look into some of the real-life applications of reinforcement learning.\n", 10 | "\n", 11 | "__Manufacturing__ - In manufacturing, intelligent robots are trained using reinforcement learning to place objects in the right position. The use of intelligent robots reduces labor costs and increases productivity. \n", 12 | "\n", 13 | "Dynamic Pricing - One of the popular applications of reinforcement learning includes dynamic pricing. Dynamic pricing implies that we change the price of the products based on demand and supply. We can train the RL agent for the dynamic pricing of the products with the goal of maximizing the revenue.\n", 14 | "\n", 15 | "__Inventory management__ - Reinforcement learning is extensively used in inventory management which is a crucial business activity. Some of these activities include supply chain management, demand forecasting, and handling several warehouse operations (such as placing products in warehouses for managing space efficiently).\n", 16 | "\n", 17 | "__Recommendation System__ - Reinforcement learning is widely used in building a recommendation system where the behavior of the user constantly changes. For instance, in the music recommendation system, the behavior or the music preference of the user changes from time to time. So in those cases using an RL agent can be very useful as the agent constantly learn by interacting with the environment. \n", 18 | "\n", 19 | "__Neural Architecture search__ - In order for the neural networks to perform a given task with good accuracy, the architecture of the network is very important and it has to properly designed. With reinforcement learning, we can automate the process of complex neural architecture search by training the agent to find the best neural architecture for a given task with the goal of maximizing the accuracy.\n", 20 | "\n", 21 | "__Natural Language Processing__ - With the increase in popularity of the deep reinforcement algorithms, RL has been widely used in several NLP tasks such as abstractive text summarization, chatbots and more.\n", 22 | "\n", 23 | "__Finance__ - Reinforcement learning is widely used in financial portfolio management which is the process of constant redistribution of a fund into different financial products. RL is also used in predicting and trading in commercial transaction markets. JP Morgan has successfully used RL to provide better trade execution results for large orders." 24 | ] 25 | } 26 | ], 27 | "metadata": { 28 | "kernelspec": { 29 | "display_name": "Python 3", 30 | "language": "python", 31 | "name": "python3" 32 | }, 33 | "language_info": { 34 | "codemirror_mode": { 35 | "name": "ipython", 36 | "version": 3 37 | }, 38 | "file_extension": ".py", 39 | "mimetype": "text/x-python", 40 | "name": "python", 41 | "nbconvert_exporter": "python", 42 | "pygments_lexer": "ipython3", 43 | "version": "3.6.9" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 2 48 | } 49 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/09. Actor Critic Methods - A2C and A3C/9.01. Overview of actor critic method.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Overview of actor critic method\n", 8 | "\n", 9 | "The actor critic method is one of the most popular algorithms in deep reinforcement learning. Several modern deep reinforcement learning algorithms are designed based on the actor critic methods. The actor critic method lies in the intersection of value based and policy based methods. That is, it takes advantage of both value based and policy based methods.\n", 10 | "\n", 11 | "In this section, without getting into more details, first, let's get a basic understanding of how actor critic method works and in the next section, we will get into more details and understand the math behind the actor critic methods. \n", 12 | "\n", 13 | "Actor critic, as the name suggests consists of two types of networks called actor network and critic network. The role of the actor network is to find an optimal policy while the role of the critic network is to evaluate the policy produced by the actor network. So, we can think of, critic network as a form of feedback network which evaluates and guides the actor network in finding the optimal policy as shown below:\n", 14 | "\n", 15 | "![title](Images/1.png)\n", 16 | "\n", 17 | "Okay, what's really actor and critic network? how it works together and improve the policy? The actor network is basically the policy network and it finds the optimal policy using a policy gradient method. The critic network is basically the value network and it estimates the state value. Thus using its state value, the critic network evaluates the action produced by actor network and sends its feedback to the actor. Based on the critic's feedback, actor network updates its parameter.\n", 18 | "\n", 19 | "Thus, in the actor critic method, we use two networks - actor network (policy network) which computes the policy and the critic network (value network) which evaluates the policy produced by actor network by computing the value function (state values). Isn't this similar to something we just learned in the previous chapter?\n", 20 | "\n", 21 | "Yes! If you could recollect it is similar to the policy gradient method with the baseline (reinforce with baseline) we learned in the previous chapter. Similar to reinforce with baseline, here also we have an actor (policy network) and a critic (value network) network. However, actor critic is NOT exactly similar to reinforce with baseline. In the reinforce with baseline method, we learned that we use value network as the baseline and it helps to reduce the variance in the gradient updates. In the actor-critic method as well, we use the critic to reduce variance in the gradient updates of the actor but also critic helps to improve the policy iteratively in an online fashion. The distinction between these two will be made clear in the next section.\n", 22 | "\n", 23 | "Now that we have a basic understanding of what is actor critic method, in the next section we will learn how exactly the actor critic method works in detail. " 24 | ] 25 | } 26 | ], 27 | "metadata": { 28 | "kernelspec": { 29 | "display_name": "Python 3", 30 | "language": "python", 31 | "name": "python3" 32 | }, 33 | "language_info": { 34 | "codemirror_mode": { 35 | "name": "ipython", 36 | "version": 3 37 | }, 38 | "file_extension": ".py", 39 | "mimetype": "text/x-python", 40 | "name": "python", 41 | "nbconvert_exporter": "python", 42 | "pygments_lexer": "ipython3", 43 | "version": "3.6.9" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 2 48 | } 49 | -------------------------------------------------------------------------------- /santa_RL/test.py: -------------------------------------------------------------------------------- 1 | from data_loader_monte import train_loader 2 | from neural_network_monte_santa import Dqn 3 | import pandas as pd 4 | import numpy as np 5 | import torch 6 | from tqdm import tqdm 7 | import os 8 | from reward_calc import calc_reward 9 | from give_reward_monte import give_reward 10 | from config import _get_default_config 11 | 12 | config = _get_default_config() 13 | device = 'cuda' 14 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 15 | N_ACTIONS = config.N_ACTIONS 16 | MAX_CAPACITY = config.MAX_CAPACITY 17 | DAYS_OF_MAX_CAPACITY = config.DAYS_OF_MAX_CAPACITY 18 | ADDITIONAL_REWARD = config.ADDITIONAL_REWARD 19 | REWARD_SCALE = config.REWARD_SCALE 20 | 21 | try: 22 | os.remove('rewards.txt') 23 | except: 24 | pass 25 | 26 | 27 | def write_to_txt(value: object, name: object) -> object: 28 | with open(f"{name}.txt", "a") as myfile: 29 | myfile.write(f"{value}" + '\n') 30 | 31 | 32 | path_data = '' 33 | dqn = Dqn(config.n_neurons, N_ACTIONS, 0.9) 34 | # dqn.load() 35 | df_standard = np.array(pd.read_csv(path_data + 'family_data_standard_scaled.csv')) 36 | df = np.array(pd.read_csv(path_data + 'family_data.csv')) 37 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 38 | gamma = 0.9 39 | sub = pd.read_csv('sample_submission.csv') 40 | results = {day: [] for day in range(100)} 41 | min_penalty = 999999 42 | for epoch in tqdm(range(1000)): 43 | empty_days = [] 44 | c = 0 45 | total_reward = 0 46 | total_penalty = 0 47 | reward = 0 48 | population_dict = {day: 0 for day in range(1, 101)} 49 | family_states = torch.zeros((1, 5100)) 50 | for id_n, (data, n) in enumerate(train_loader): 51 | mask = torch.zeros((1, N_ACTIONS)) # mask if day is full - you can't choose it . zero in the begining 52 | current_row = df[int(n.numpy())] 53 | current_row = np.array(current_row[1:N_ACTIONS + 1].tolist() + [current_row[-1]]) 54 | n_members = current_row[-1] 55 | days = current_row[:-1] 56 | for n_pos, day_var in enumerate(days[:-1]): # fill mask with -inf if you can't choose it 57 | if population_dict[day_var] + n_members > MAX_CAPACITY: 58 | mask[0, n_pos] = -1 * np.inf 59 | # blocked = (mask == -np.inf).sum().numpy() 60 | data_state = torch.cat((data, family_states), dim=1) 61 | # if blocked != N_ACTIONS: 62 | action = dqn.update(data_state, reward, mask, 'train').detach().cpu() 63 | array_actions = action.numpy() 64 | selected_action = array_actions[0] 65 | if selected_action != N_ACTIONS - 1: # if it is not last pick - take a day 66 | day = current_row[:-1][selected_action] 67 | else: # if it is last pick - it means we choose last random free day 68 | valid_days = np.array(list(map(int, population_dict.values()))) + n_members <= MAX_CAPACITY 69 | valid_days = np.array(list(range(1, 101)))[valid_days] 70 | day = np.random.choice(valid_days) 71 | array_actions = [999] 72 | 73 | population_dict[day] += n_members # fill the dict with people who chosen specific day 74 | reward, penalty = calc_reward(array_actions, n_members) 75 | 76 | family_states[0, int(n[0])] = (day - 50.5) / 29.8 77 | family_states[0, int(n[0]) + day - 1] += n_members/MAX_CAPACITY 78 | reward = give_reward(family_states, dqn, population_dict, df, df_standard, reward, 79 | config.episdodes_monte) 80 | total_reward += reward 81 | total_penalty += penalty 82 | 83 | sub.at[int(n[0]), 'assigned_day'] = day 84 | print(total_reward, 'reward', total_penalty, 'penalty') 85 | write_to_txt(str(total_reward) + ' ' + str(epoch) + ' ' + str(total_penalty), 'rewards') 86 | 87 | if abs(total_penalty) < min_penalty: 88 | dqn.save() 89 | min_penalty = abs(total_penalty) 90 | sub.to_csv('test.csv', index=None) 91 | -------------------------------------------------------------------------------- /similiar_pictures/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Pictures Similiarity | Valeriy Babushkin 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |

Pictures similiarity





18 |

By: Valeriy Babushkin

19 | 20 | 21 |
22 | 23 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | Predict 32 |
33 | 34 |
35 |

36 |
37 | 38 | 39 | 40 | 41 | 42 | 43 |
44 |
45 | 46 | 85 |
86 |
87 |
88 | 89 | Upload new File 90 |

Upload new File

91 |
92 |

93 | 94 |

95 |
96 | 97 | 98 |
99 |
100 | 101 | pic1 102 | pic1 103 | pic1 104 | pic1 105 | pic1 106 | 107 |
108 |
109 | 110 | 111 | -------------------------------------------------------------------------------- /Mercari_0_3875_CV.py: -------------------------------------------------------------------------------- 1 | import os; os.environ['OMP_NUM_THREADS'] = '1' 2 | from contextlib import contextmanager 3 | from functools import partial 4 | from operator import itemgetter 5 | from multiprocessing.pool import ThreadPool 6 | import time 7 | from typing import List, Dict 8 | 9 | import keras as ks 10 | import pandas as pd 11 | import numpy as np 12 | import tensorflow as tf 13 | from sklearn.feature_extraction import DictVectorizer 14 | from sklearn.feature_extraction.text import TfidfVectorizer as Tfidf 15 | from sklearn.pipeline import make_pipeline, make_union, Pipeline 16 | from sklearn.preprocessing import FunctionTransformer, StandardScaler 17 | from sklearn.metrics import mean_squared_log_error 18 | from sklearn.model_selection import KFold 19 | 20 | @contextmanager 21 | def timer(name): 22 | t0 = time.time() 23 | yield 24 | print(f'[{name}] done in {time.time() - t0:.0f} s') 25 | 26 | def preprocess(df: pd.DataFrame) -> pd.DataFrame: 27 | df['name'] = df['name'].fillna('') + ' ' + df['brand_name'].fillna('') 28 | df['text'] = (df['item_description'].fillna('') + ' ' + df['name'] + ' ' + df['category_name'].fillna('')) 29 | return df[['name', 'text', 'shipping', 'item_condition_id']] 30 | 31 | def on_field(f: str, *vec) -> Pipeline: 32 | return make_pipeline(FunctionTransformer(itemgetter(f), validate=False), *vec) 33 | 34 | def to_records(df: pd.DataFrame) -> List[Dict]: 35 | return df.to_dict(orient='records') 36 | 37 | def fit_predict(xs, y_train) -> np.ndarray: 38 | X_train, X_test = xs 39 | config = tf.ConfigProto( 40 | intra_op_parallelism_threads=1, use_per_session_threads=1, inter_op_parallelism_threads=1) 41 | with tf.Session(graph=tf.Graph(), config=config) as sess, timer('fit_predict'): 42 | ks.backend.set_session(sess) 43 | model_in = ks.Input(shape=(X_train.shape[1],), dtype='float32', sparse=True) 44 | out = ks.layers.Dense(192, activation='relu')(model_in) 45 | out = ks.layers.Dense(64, activation='relu')(out) 46 | out = ks.layers.Dense(64, activation='relu')(out) 47 | out = ks.layers.Dense(1)(out) 48 | model = ks.Model(model_in, out) 49 | model.compile(loss='mean_squared_error', optimizer=ks.optimizers.Adam(lr=3e-3)) 50 | for i in range(3): 51 | with timer(f'epoch {i + 1}'): 52 | model.fit(x=X_train, y=y_train, batch_size=2**(11 + i), epochs=1, verbose=0) 53 | return model.predict(X_test)[:, 0] 54 | 55 | def main(): 56 | vectorizer = make_union( 57 | on_field('name', Tfidf(max_features=100000, token_pattern='\w+')), 58 | on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))), 59 | on_field(['shipping', 'item_condition_id'], 60 | FunctionTransformer(to_records, validate=False), DictVectorizer()), 61 | n_jobs=4) 62 | y_scaler = StandardScaler() 63 | with timer('process train'): 64 | train = pd.read_table('../input/train.tsv') 65 | train = train[train['price'] > 0].reset_index(drop=True) 66 | cv = KFold(n_splits=20, shuffle=True, random_state=42) 67 | train_ids, valid_ids = next(cv.split(train)) 68 | train, valid = train.iloc[train_ids], train.iloc[valid_ids] 69 | y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1))) 70 | X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32) 71 | print(f'X_train: {X_train.shape} of {X_train.dtype}') 72 | del train 73 | with timer('process valid'): 74 | X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32) 75 | with ThreadPool(processes=4) as pool: 76 | Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]] 77 | xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2 78 | y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0) 79 | y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0]) 80 | print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred)))) 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /similiar_pictures/instructions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to Deploy Keras Models to Production\n", 8 | "\n", 9 | "## Our demo steps\n", 10 | "We'll build a simple Flask app to predict the character that you've drawn in the browser!\n", 11 | "\n", 12 | "- Step 1 - Train our model with Keras\n", 13 | "- Step 2 - Save our model \n", 14 | "- Step 3 - Write our Flask backend to serve our saved model (then look at dep classes (load.py, index.js, index.html)\n", 15 | "- Step 4 - Deploy our code to Google Cloud\n", 16 | "\n", 17 | "![alt text](http://i.imgur.com/UNHn2Xf.png \"Logo Title Text 1\")\n", 18 | "\n", 19 | "## Whats our stack look like\n", 20 | "\n", 21 | "- We use Keras with a Tensorflow backend to train a small 8 layer Convolutional Network to \n", 22 | "recognize handwritten character digits\n", 23 | "- We use Flask as our backend for serving these pretrained models. You could also use Node.js, or any number of JS web frameworks but i wouldn't want to write a complex backend in JS. I <3 python\n", 24 | "\n", 25 | "![alt text](https://image.slidesharecdn.com/flaskpython-130201154928-phpapp01/95/flask-python-10-638.jpg?cb=1359733858 \"Logo Title Text 1\")\n", 26 | "\n", 27 | "## What else is out there? Why use this?\n", 28 | "\n", 29 | "- Because doing this natively is dead simple. However, if you want GPU accelerated inference, these libraries are good options\n", 30 | "- Keras.js https://github.com/transcranial/keras-js Run Keras models in the browser (python backend, JS front-end) with GPU support. Only performs forward-pass inference (so no training in the browser). Uss ndarray (like js version of numpY) and weblas (GPU acceleration) to perform matrix ops\n", 31 | "- WebDNN https://github.com/mil-tokyo/webdnn (but converting the model to its required format requires OS X (Specifically the xcode dependency 'metal' for running GPU operations). Not everyone uses OS X! Although Francois Chollet is a fan and its also faster than this. But this has more examples.\n", 32 | "- NeoCortex https://github.com/scienceai/neocortex the interactive demos are pretty cool but not actively maintained" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Once our model is trained + saved with train.py, we've written our flask app, use these steps to deploy to google cloud\n", 40 | "\n", 41 | "We can Deploy our app to App Engine using the \n", 42 | "\n", 43 | "`gcloud app deploy`\n", 44 | "\n", 45 | "command. This command automatically builds a container image by using the Container Builder service and then deploys that image to the App Engine flexible environment. The container will include any local modifications that you've made to the runtime image.\n", 46 | "\n", 47 | "Launch your browser and view the app at http://YOUR_PROJECT_ID.appspot.com, by running the following command:\n", 48 | "\n", 49 | "`gcloud app browse`\n", 50 | "\n", 51 | "\n", 52 | "More info here https://cloud.google.com/appengine/docs/flexible/python/quickstart\n", 53 | "\n", 54 | "and this guy has a different and detailed tutorial as well \n", 55 | "\n", 56 | "https://medium.com/google-cloud/keras-inception-v3-on-google-compute-engine-a54918b0058\n", 57 | "\n", 58 | "\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "outputs": [], 68 | "source": [] 69 | } 70 | ], 71 | "metadata": { 72 | "kernelspec": { 73 | "display_name": "Python 2", 74 | "language": "python", 75 | "name": "python2" 76 | }, 77 | "language_info": { 78 | "codemirror_mode": { 79 | "name": "ipython", 80 | "version": 2 81 | }, 82 | "file_extension": ".py", 83 | "mimetype": "text/x-python", 84 | "name": "python", 85 | "nbconvert_exporter": "python", 86 | "pygments_lexer": "ipython2", 87 | "version": "2.7.12" 88 | } 89 | }, 90 | "nbformat": 4, 91 | "nbformat_minor": 2 92 | } 93 | -------------------------------------------------------------------------------- /mercari/Mercari_0_3875_CV.py: -------------------------------------------------------------------------------- 1 | import os; os.environ['OMP_NUM_THREADS'] = '1' 2 | from contextlib import contextmanager 3 | from functools import partial 4 | from operator import itemgetter 5 | from multiprocessing.pool import ThreadPool 6 | import time 7 | from typing import List, Dict 8 | 9 | import keras as ks 10 | import pandas as pd 11 | import numpy as np 12 | import tensorflow as tf 13 | from sklearn.feature_extraction import DictVectorizer 14 | from sklearn.feature_extraction.text import TfidfVectorizer as Tfidf 15 | from sklearn.pipeline import make_pipeline, make_union, Pipeline 16 | from sklearn.preprocessing import FunctionTransformer, StandardScaler 17 | from sklearn.metrics import mean_squared_log_error 18 | from sklearn.model_selection import KFold 19 | 20 | @contextmanager 21 | def timer(name): 22 | t0 = time.time() 23 | yield 24 | print(f'[{name}] done in {time.time() - t0:.0f} s') 25 | 26 | def preprocess(df: pd.DataFrame) -> pd.DataFrame: 27 | df['name'] = df['name'].fillna('') + ' ' + df['brand_name'].fillna('') 28 | df['text'] = (df['item_description'].fillna('') + ' ' + df['name'] + ' ' + df['category_name'].fillna('')) 29 | return df[['name', 'text', 'shipping', 'item_condition_id']] 30 | 31 | def on_field(f: str, *vec) -> Pipeline: 32 | return make_pipeline(FunctionTransformer(itemgetter(f), validate=False), *vec) 33 | 34 | def to_records(df: pd.DataFrame) -> List[Dict]: 35 | return df.to_dict(orient='records') 36 | 37 | def fit_predict(xs, y_train) -> np.ndarray: 38 | X_train, X_test = xs 39 | config = tf.ConfigProto( 40 | intra_op_parallelism_threads=1, use_per_session_threads=1, inter_op_parallelism_threads=1) 41 | with tf.Session(graph=tf.Graph(), config=config) as sess, timer('fit_predict'): 42 | ks.backend.set_session(sess) 43 | model_in = ks.Input(shape=(X_train.shape[1],), dtype='float32', sparse=True) 44 | out = ks.layers.Dense(192, activation='relu')(model_in) 45 | out = ks.layers.Dense(64, activation='relu')(out) 46 | out = ks.layers.Dense(64, activation='relu')(out) 47 | out = ks.layers.Dense(1)(out) 48 | model = ks.Model(model_in, out) 49 | model.compile(loss='mean_squared_error', optimizer=ks.optimizers.Adam(lr=3e-3)) 50 | for i in range(3): 51 | with timer(f'epoch {i + 1}'): 52 | model.fit(x=X_train, y=y_train, batch_size=2**(11 + i), epochs=1, verbose=0) 53 | return model.predict(X_test)[:, 0] 54 | 55 | def main(): 56 | vectorizer = make_union( 57 | on_field('name', Tfidf(max_features=100000, token_pattern='\w+')), 58 | on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))), 59 | on_field(['shipping', 'item_condition_id'], 60 | FunctionTransformer(to_records, validate=False), DictVectorizer()), 61 | n_jobs=4) 62 | y_scaler = StandardScaler() 63 | with timer('process train'): 64 | train = pd.read_table('../input/train.tsv') 65 | train = train[train['price'] > 0].reset_index(drop=True) 66 | cv = KFold(n_splits=20, shuffle=True, random_state=42) 67 | train_ids, valid_ids = next(cv.split(train)) 68 | train, valid = train.iloc[train_ids], train.iloc[valid_ids] 69 | y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1))) 70 | X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32) 71 | print(f'X_train: {X_train.shape} of {X_train.dtype}') 72 | del train 73 | with timer('process valid'): 74 | X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32) 75 | with ThreadPool(processes=4) as pool: 76 | Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]] 77 | xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2 78 | y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0) 79 | y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0]) 80 | print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred)))) 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /tips_tricks/5_1_Validation dataset tuning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "from itertools import combinations\n", 14 | "from catboost import CatBoostClassifier\n", 15 | "from sklearn.model_selection import train_test_split, KFold\n", 16 | "from sklearn.metrics import roc_auc_score\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "np.random.seed(42)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "df = pd.read_csv('train.csv')\n", 31 | "y = df.target\n", 32 | "\n", 33 | "df.drop(['ID', 'target'], axis=1, inplace=True)\n", 34 | "df.fillna(-9999, inplace=True)\n", 35 | "cat_features_ids = np.where(df.apply(pd.Series.nunique) < 30000)[0].tolist()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "train, test, y_train, y_test = train_test_split(df, y, test_size = 0.1,random_state = 42)\n", 47 | "train, val, y_train, y_val = train_test_split(train, y_train, test_size = 0.25)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": { 54 | "scrolled": false 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "Roc-auc score with Catboost: 0.7841281938499387\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, eval_metric='AUC', logging_level='Silent')\n", 67 | "clf.fit(train, y_train, cat_features=cat_features_ids, eval_set=(val, y_val))\n", 68 | "prediction = clf.predict_proba(test)\n", 69 | "print('Roc-auc score with Catboost:',roc_auc_score(y_test, prediction[:, 1]))" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Roc-auc score with Catboost: 0.7930162585925847\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "kfold = KFold(n_splits=10)\n", 87 | "pred = []\n", 88 | "train, test, y_train, y_test = train_test_split(df, y, test_size = 0.1,random_state = 42)\n", 89 | "for train_ind, test_ind in kfold.split(train):\n", 90 | " train_val, test_val, y_train_val, y_test_val = train.iloc[train_ind, :], train.iloc[test_ind, :],\\\n", 91 | " y_train.iloc[train_ind], y_train.iloc[test_ind]\n", 92 | " clf.fit(train_val, y_train_val, cat_features=cat_features_ids, eval_set=(test_val, y_test_val))\n", 93 | " prediction = clf.predict_proba(test)\n", 94 | " pred.append(\n", 95 | " prediction[:, 1]\n", 96 | " )\n", 97 | " \n", 98 | "\n", 99 | "print('Roc-auc score with Catboost:',roc_auc_score(y_test, np.mean(pred, axis = 0)))" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.6.1" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 2 133 | } 134 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.05. How RL differs from other ML paradigms?.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How RL differs from other ML paradigms?\n", 8 | "Machine Learning (ML) can be categorized into three types:\n", 9 | "\n", 10 | "* Supervised Learning\n", 11 | "* Unsupervised Learning\n", 12 | "* Reinforcement Learning\n", 13 | "\n", 14 | "In supervised learning, the machine learns from the training data. The training data consists of labeled pair of inputs and outputs. So, we train the model (agent) using the given training data in such a way that the model can generalize its learning to new unseen data. It is called supervised learning because training data acts as a supervisor since it has a labeled pair of input and outputs and it guides the model in learning the given task.\n", 15 | "\n", 16 | "Now, let's understand the difference between supervised and reinforcement learning with an example. Consider the dog analogy we discussed earlier in the chapter, in supervised learning, to teach the dog to catch a ball, we will teach it explicitly by specifying turn left, go right, move forward seven steps, catch the ball, and so on in the form of training data. But in RL we just throw a ball, and every time the dog catches the ball, we give it a cookie (reward). So the dog will learn to catch the ball while trying to maximize the cookie (reward) it can get. \n", 17 | "\n", 18 | "Let's consider one more example. Say, we want to train the model to play the chess game using supervised learning. In that case, we will have training data which includes all the moves that can be played by the player in each state along with the labels indicating whether it is a good move or not. Then, we will train the model to learn from this training data. Whereas in the case of reinforcement learning, our agent will not be given any sort of training data, instead we just give a reward to the agent for each action it performs. Then the agent will learn by interacting with the environment and based on the reward it gets. \n", 19 | "\n", 20 | "Similar to supervised learning, in unsupervised learning, we train the model (agent) based on the training data. But in the case of unsupervised learning, the training data does not contain any labels, that is, it consists of only inputs and not output. The goal of unsupervised learning is to determine the hidden pattern in the input. There is a common misconception that reinforcement learning is a kind of unsupervised learning but it is not. In unsupervised learning, the model learns the hidden structure whereas in reinforcement learning the model learns by maximizing the reward.\n", 21 | "\n", 22 | "For instance, consider we are building a movie recommendation system and say we want to recommend a new movie to the user, then in the case of unsupervised learning, we can analyse the similar movies related to the movies the user has viewed before and recommend the new movies to the user, whereas, in case of reinforcement learning, the agent constantly receives feedback from the user based on the reward, understand his movie preferences, and builds a knowledge base on top of it and suggests new movies to the user.\n", 23 | "\n", 24 | "Thus, we can say that in both supervised and unsupervised learning the model (agent) learns based on the given training dataset whereas in reinforcement learning agent learns by directly interacting with the environment. Thus reinforcement learning is essentially an interaction between the agent and its environment. " 25 | ] 26 | } 27 | ], 28 | "metadata": { 29 | "kernelspec": { 30 | "display_name": "Python 3", 31 | "language": "python", 32 | "name": "python3" 33 | }, 34 | "language_info": { 35 | "codemirror_mode": { 36 | "name": "ipython", 37 | "version": 3 38 | }, 39 | "file_extension": ".py", 40 | "mimetype": "text/x-python", 41 | "name": "python", 42 | "nbconvert_exporter": "python", 43 | "pygments_lexer": "ipython3", 44 | "version": "3.6.9" 45 | } 46 | }, 47 | "nbformat": 4, 48 | "nbformat_minor": 2 49 | } 50 | -------------------------------------------------------------------------------- /big_data_for_engineers/spark_shortest_path.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from pyspark import SparkConf, SparkContext\n", 12 | "sc = SparkContext(conf=SparkConf().setAppName(\"MyApp\").setMaster(\"local[8]\"))" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "def parse_edge(s):\n", 24 | " u, f = s.split(\"\\t\")\n", 25 | " return (int(u), int(f))\n", 26 | "\n", 27 | "def step(i): \n", 28 | " pv, pd, nv = i[0], i[1][0], i[1][1] \n", 29 | " return (nv, pd + 1)\n", 30 | "\n", 31 | "def complete(item): \n", 32 | " v, od, nd = item[0], item[1][0], item[1][1]\n", 33 | " return (v, od if od is not None else nd)\n", 34 | "\n", 35 | "def update_path(x):\n", 36 | " v, (old_path, (dist, new_v)) = x\n", 37 | " return new_v, old_path + (new_v,)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "def shortest_path(v_from, v_to, dataset_path, numPartitions=10):\n", 49 | " edges = sc.textFile(dataset_path, numPartitions).map(parse_edge).cache()\n", 50 | " forward_edges = edges.map(lambda e: (e[1], e[0])).partitionBy(numPartitions).cache()\n", 51 | " \n", 52 | " d = 0\n", 53 | " distances = sc.parallelize([(v_from, d)]).partitionBy(numPartitions)\n", 54 | " paths = sc.parallelize([(v_from, (v_from,))])\n", 55 | " \n", 56 | " while True:\n", 57 | " candidates = distances.join(forward_edges, numPartitions).map(step)\n", 58 | " paths = paths.join(forward_edges).map(lambda x: (x[1][1], x[1][0] + (x[1][1],))).union(paths).distinct().cache()\n", 59 | " new_distances = distances.fullOuterJoin(candidates).map(complete).distinct().cache()\n", 60 | " count = new_distances.filter(lambda i: i[1] == d + 1).count() \n", 61 | " if count > 0:\n", 62 | " d += 1 \n", 63 | " distances = new_distances\n", 64 | " # print \"d = {}, count = {}\".format(d, count)\n", 65 | " else:\n", 66 | " break\n", 67 | " \n", 68 | " result = paths.filter(lambda x: x[1][0] == v_from and x[1][-1] == v_to).collect()\n", 69 | " return ','.join(map(str, sorted(result, key=lambda x: len(x[1]))[0][1]))" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "CPU times: user 3.58 s, sys: 1.83 s, total: 5.41 s\n", 82 | "Wall time: 11min 23s\n" 83 | ] 84 | }, 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "'12,422,53,52,107,20,23,274,34'" 89 | ] 90 | }, 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "%%time\n", 98 | "shortest_path(12, 34, \"/data/twitter/twitter_sample.txt\")" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": true 106 | }, 107 | "outputs": [], 108 | "source": [] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 2", 114 | "language": "python", 115 | "name": "python2" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 2 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython2", 127 | "version": "2.7.12" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } 133 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/02. A Guide to the Gym Toolkit/2.05. Cart Pole Balancing with Random Policy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Cart Pole Balancing with Random Policy\n", 8 | "\n", 9 | "Let's create an agent with the random policy, that is, we create the agent that selects the random action in the environment and tries to balance the pole. The agent receives +1 reward every time the pole stands straight up on the cart. We will generate over 100 episodes and we will see the return (sum of rewards) obtained over each episode. Let's learn this step by step.\n", 10 | "\n", 11 | "First, create our cart pole environment:" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import gym\n", 21 | "env = gym.make('CartPole-v0')" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "\n", 29 | "Set the number of episodes and number of time steps in the episode:\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "num_episodes = 100\n", 39 | "num_timesteps = 50" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Episode: 0, Return: 23.0\n", 52 | "Episode: 10, Return: 12.0\n", 53 | "Episode: 20, Return: 23.0\n", 54 | "Episode: 30, Return: 15.0\n", 55 | "Episode: 40, Return: 19.0\n", 56 | "Episode: 50, Return: 10.0\n", 57 | "Episode: 60, Return: 16.0\n", 58 | "Episode: 70, Return: 10.0\n", 59 | "Episode: 80, Return: 22.0\n", 60 | "Episode: 90, Return: 38.0\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "#for each episode\n", 66 | "for i in range(num_episodes):\n", 67 | " \n", 68 | " #set the Return to 0\n", 69 | " Return = 0\n", 70 | " #initialize the state by resetting the environment\n", 71 | " state = env.reset()\n", 72 | " \n", 73 | " #for each step in the episode\n", 74 | " for t in range(num_timesteps):\n", 75 | " #render the environment\n", 76 | " env.render()\n", 77 | " \n", 78 | " #randomly select an action by sampling from the environment\n", 79 | " random_action = env.action_space.sample()\n", 80 | " \n", 81 | " #perform the randomly selected action\n", 82 | " next_state, reward, done, info = env.step(random_action)\n", 83 | "\n", 84 | " #update the return\n", 85 | " Return = Return + reward\n", 86 | "\n", 87 | " #if the next state is a terminal state then end the episode\n", 88 | " if done:\n", 89 | " break\n", 90 | " #for every 10 episodes, print the return (sum of rewards)\n", 91 | " if i%10==0:\n", 92 | " print('Episode: {}, Return: {}'.format(i, Return))\n", 93 | " " 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "Close the environment:" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 5, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "env.close()" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 3", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.6.9" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 2 134 | } 135 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/06. Case Study: The MAB Problem/6.03. Epsilon-Greedy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Epsilon-greedy\n", 8 | "\n", 9 | "We already learned about the epsilon-greedy algorithm in the previous chapters. With the epsilon-greedy, we select the best arm with probability 1-epsilon and we select the random arm with probability epsilon. Let's take a simple example and learn how we find the best arm exactly with the epsilon-greedy method in more detail. \n", 10 | "\n", 11 | "Say, we have two arms - arm 1 and arm 2. Suppose, with arm 1 we win the game 80% of the time and with arm 2 we win the game with 20% of the time. So, we can say that arm 1 is the best arm as it makes us win the game 80% of the time. Now, let's learn how to find this with the epsilon-greedy method. \n", 12 | "\n", 13 | "First, we initialize the `count` - number of times the arm is pulled, `sum_rewards` - the sum of rewards obtained from pulling the arm, `Q`- average reward obtained by pulling the arm as shown below:\n", 14 | "\n", 15 | "\n", 16 | "![title](Images/1.PNG)\n", 17 | "\n", 18 | "\n", 19 | "\n", 20 | "## Round 1:\n", 21 | "\n", 22 | "Say, in round 1 of the game, we select the random arm with probability epsilon, suppose we randomly pull the arm 1 and observe the reward. Let the reward obtained by pulling the arm 1 be 1. So, we update our table with `count` of arm 1 to 1, `sum_rewards` of arm 1 to 1 and thus the average reward `Q` of the arm 1 after round 1 will be 1 as shown below:\n", 23 | "\n", 24 | "\n", 25 | "![title](Images/2.PNG)\n", 26 | "\n", 27 | "\n", 28 | "## Round 2:\n", 29 | "\n", 30 | "Say, in round 2, we select the best arm with probability 1-epsilon. The best arm is the one which has a maximum average reward. So, we check our table as which arm has the maximum average reward, since arm 1 has the maximum average reward, we pull the arm 1 and observe the reward and let the reward obtained from pulling the arm 1 be 1. So, we update our table with `count` of arm 1 to 2, `sum_rewards` of arm 1 to 2 and thus the average reward `Q` of the arm 1 after round 2 will be 1 as shown below:\n", 31 | "\n", 32 | "\n", 33 | "\n", 34 | "![title](Images/3.PNG)\n", 35 | "## Round 3:\n", 36 | "\n", 37 | "Say, in round 3, we select the random arm with probability epsilon, suppose we randomly pull the arm 2 and observe the reward. Let the reward obtained by pulling the arm 2 be 0. So, we update our table with `count` of arm 2 to 1, `sum_rewards` of arm 2 to 0 and thus the average reward `Q` of the arm 2 after round 3 will be 0 as shown below:\n", 38 | "\n", 39 | "![title](Images/4.PNG)\n", 40 | "\n", 41 | "## Round 4:\n", 42 | "\n", 43 | "Say, in round 4, we select the best arm with probability 1-epsilon. So, we pull arm 1 since it has a maximum average reward. Let the reward obtained by pulling arm 1 be 0 this time. Now, we update our table with `count` of arm 1 to 3, `sum_rewards` of arm 2 to 2 and thus the average reward `Q` of the arm 1 after round 4 will be 0.66 as shown below:\n", 44 | "\n", 45 | "\n", 46 | "\n", 47 | "![title](Images/5.PNG)\n", 48 | "We repeat this process for several numbers of rounds, that is, for several rounds of the game, we pull the best arm with probability 1-epsilon and we pull the random arm with the probability epsilon. The updated table after some 100 rounds of game is shown below:\n", 49 | "\n", 50 | "![title](Images/6.PNG)\n", 51 | "\n", 52 | "\n", 53 | "From the above table, we can conclude that arm 1 is the best arm since it has the maximum average reward. \n" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.6.9" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 2 78 | } 79 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.11. Different Types of Environments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Different types of environments\n", 8 | "\n", 9 | "We learned that the environment is the world of the agent and the agent lives/stays within the environment. We can categorize the environment into different types as follows:\n", 10 | "\n", 11 | "## Deterministic and Stochastic environment\n", 12 | "\n", 13 | "__Deterministic environment__ - In a deterministic environment, we can be sure that when an agent performs an action $a$ in the state $s$ then it always reaches the state $s'$ exactly. For example, let's consider our grid world environment. Say the agent is in state A when we perform action down in the state A we always reach the state D and so it is called deterministic environment:\n", 14 | "\n", 15 | "\n", 16 | "\n", 17 | "![title](Images/35.png)\n", 18 | "\n", 19 | "__Stochastic environment__ - In a stochastic environment, we cannot say that by performing some action $a$ in the state $s$ the agent always reaches the state $s'$ exactly because there will be some randomness associated with the stochastic environment. For example, let's suppose our grid world environment is a stochastic environment. Say our agent is in state A, now if we perform action down in the state A then the agent doesn't always reach the state D instead it reaches the state D for 70% of the time and the state B for 30 % of the time. That is, if we perform action down in the state A then the agent reaches the state D with 70% probability and the state B with 30% probability as shown below:\n", 20 | "\n", 21 | "\n", 22 | "![title](Images/36.png)\n", 23 | "\n", 24 | "## Discrete and continuous environment \n", 25 | "\n", 26 | "__Discrete Environment__ - When the action space of the environment is discrete then our environment is called a discrete environment. For instance, in the grid world environment, we have discrete action space which includes actions such as [up, down, left, right] and thus our grid world environment is called the discrete environment. \n", 27 | "\n", 28 | "__Continuous environment__ - When the action space of the environment is continuous then our environment is called a continuous environment. For instance, suppose, we are training an agent to drive a car then our action space will be continuous with several continuous actions such as speed in which we need to drive the car, the number of degrees we need to rotate the wheel and so on. In such a case where our action space of the environment is continuous, it is called continuous environment. \n", 29 | "\n", 30 | "## Episodic and non-episodic environment \n", 31 | "\n", 32 | "__Episodic environment__ - In an episodic environment, an agent's current action will not affect future action and thus the episodic environment is also called the non-sequential environment. \n", 33 | "\n", 34 | "__Non-episodic environment__ - In a non-episodic environment, an agent's current action will affect future action and thus the non-episodic environment is also called the sequential environment. Example: The chessboard is a sequential environment since the agent's current action will affect future action in a chess game.\n", 35 | "\n", 36 | "## Single and multi-agent environment\n", 37 | "\n", 38 | "__Single-agent environment__ - When our environment consists of only a single agent then it is called a single-agent environment. \n", 39 | "\n", 40 | "__Multi-agent environment__ - When our environment consists of multiple agents then it is called a multi-agent environment. " 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 3", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.6.9" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 2 65 | } 66 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.model_selection import train_test_split 3 | from PIL import Image 4 | from torchvision import transforms as T 5 | import torch.utils.data as D 6 | import torch 7 | from config import _get_default_config 8 | from albumentations import ( 9 | HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90, 10 | Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue, 11 | IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, RandomBrightnessContrast, IAAPiecewiseAffine, 12 | IAASharpen, IAAEmboss, Flip, OneOf, Compose, RandomCrop, RandomSizedCrop, CenterCrop 13 | ) 14 | 15 | path_data = '/mnt/ssd1/datasets/Recursion_class' 16 | config = _get_default_config() 17 | BATCH_SIZE = config.batch_size 18 | 19 | 20 | def strong_aug(p=.3): 21 | return Compose([ 22 | # RandomRotate90(), 23 | Flip(), # good 24 | # Transpose(), 25 | # OneOf([ 26 | # IAAAdditiveGaussianNoise(), 27 | # GaussNoise(), 28 | # ], p=0.2), 29 | # OneOf([ 30 | # RandomCrop(384, 384), 31 | # ], p=0.2), 32 | ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=90, p=0.2), # good 33 | # OneOf([ 34 | # OpticalDistortion(p=0.3), 35 | # GridDistortion(p=.1), 36 | # IAAPiecewiseAffine(p=0.3), 37 | # ], p=0.2), 38 | # OneOf([ 39 | # IAASharpen(), 40 | # IAAEmboss(), 41 | # RandomBrightnessContrast(), 42 | # ], p=0.3), 43 | ], p=p) 44 | 45 | 46 | aug = strong_aug() 47 | 48 | transform = T.Compose([ 49 | T.Normalize((5.845, 15.567, 10.105, 9.964, 5.576, 9.067), 50 | (6.905, 12.556, 5.584, 7.445, 4.668, 4.910))]) 51 | 52 | 53 | class ImagesDS(D.Dataset): 54 | def __init__(self, df, img_dir, mode='train', site=config.site, channels=[1, 2, 3, 4, 5, 6]): 55 | self.records = df.to_records(index=False) 56 | self.channels = channels 57 | self.site = site 58 | self.mode = mode 59 | self.img_dir = img_dir 60 | self.len = df.shape[0] 61 | 62 | @staticmethod 63 | def _load_img_as_tensor(file_name): 64 | with Image.open(file_name) as img: 65 | return T.ToTensor()(img) 66 | 67 | def _get_img_path(self, index, channel): 68 | experiment, well, plate = self.records[index].experiment, self.records[index].well, self.records[index].plate 69 | return '/'.join([self.img_dir, experiment, f'Plate{plate}', f'{well}_s{self.site}_w{channel}.png']) 70 | 71 | def __getitem__(self, index): 72 | paths = [self._get_img_path(index, ch) for ch in self.channels] 73 | img = torch.cat([self._load_img_as_tensor(img_path) for img_path in paths]) 74 | if self.mode == 'train': 75 | return aug(image=img.cpu().detach().numpy())['image'], int(self.records[index].sirna) 76 | # return img, int(self.records[index].sirna) 77 | 78 | else: 79 | return img, self.records[index].id_code 80 | 81 | def __len__(self): 82 | return self.len 83 | 84 | 85 | df = pd.read_csv(path_data+'/train.csv', engine='python') 86 | 87 | 88 | df_train, df_val = train_test_split(df, test_size=0.1, stratify=df.sirna, random_state=config.random_seed) 89 | 90 | if not config.all: 91 | 92 | col_name = df.columns.tolist() 93 | df_train = pd.DataFrame([x for x in df_train.values if config.experiment in x[0]]) 94 | df_val = pd.DataFrame([x for x in df_val.values if config.experiment in x[0]]) 95 | df_train.columns = col_name 96 | df_val.columns = col_name 97 | 98 | df_test = pd.read_csv(path_data+'/test.csv', engine='python') 99 | 100 | ds = ImagesDS(df_train, path_data, mode='train') 101 | ds_val = ImagesDS(df_val, path_data, mode='train') 102 | ds_test = ImagesDS(df_test, path_data, mode='test') 103 | 104 | loader = D.DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) 105 | val_loader = D.DataLoader(ds_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) 106 | tloader = D.DataLoader(ds_test, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) -------------------------------------------------------------------------------- /Keras starter with bagging (LB: 1120.596).py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pandas as pd 4 | import subprocess 5 | from scipy.sparse import csr_matrix, hstack 6 | from sklearn.metrics import mean_absolute_error 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn.cross_validation import KFold 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Dropout, Activation 11 | from keras.layers.advanced_activations import PReLU 12 | def batch_generator(X, y, batch_size, shuffle): 13 | #chenglong code for fiting from generator (https://www.kaggle.com/c/talkingdata-mobile-user-demographics/forums/t/22567/neural-network-for-sparse-matrices) 14 | number_of_batches = np.ceil(X.shape[0]/batch_size) 15 | counter = 0 16 | sample_index = np.arange(X.shape[0]) 17 | if shuffle: 18 | np.random.shuffle(sample_index) 19 | while True: 20 | batch_index = sample_index[batch_size*counter:batch_size*(counter+1)] 21 | X_batch = X[batch_index,:].toarray() 22 | y_batch = y[batch_index] 23 | counter += 1 24 | yield X_batch, y_batch 25 | if (counter == number_of_batches): 26 | if shuffle: 27 | np.random.shuffle(sample_index) 28 | counter = 0 29 | def batch_generatorp(X, batch_size, shuffle): 30 | number_of_batches = X.shape[0] / np.ceil(X.shape[0]/batch_size) 31 | counter = 0 32 | sample_index = np.arange(X.shape[0]) 33 | while True: 34 | batch_index = sample_index[batch_size * counter:batch_size * (counter + 1)] 35 | X_batch = X[batch_index, :].toarray() 36 | counter += 1 37 | yield X_batch 38 | if (counter == number_of_batches): 39 | counter = 0 40 | ## read data 41 | train = pd.read_csv('train.csv') 42 | test = pd.read_csv('test.csv') 43 | 44 | ## set test loss to NaN 45 | test['loss'] = np.nan 46 | 47 | ## response and IDs 48 | y = train['loss'].values 49 | id_train = train['id'].values 50 | id_test = test['id'].values 51 | ## stack train test 52 | ntrain = train.shape[0] 53 | tr_te = pd.concat((train, test), axis = 0) 54 | ## Preprocessing and transforming to sparse data 55 | sparse_data = [] 56 | 57 | f_cat = [f for f in tr_te.columns if 'cat' in f] 58 | for f in f_cat: 59 | dummy = pd.get_dummies(tr_te[f].astype('category')) 60 | tmp = csr_matrix(dummy) 61 | sparse_data.append(tmp) 62 | 63 | f_num = [f for f in tr_te.columns if 'cont' in f] 64 | scaler = StandardScaler() 65 | tmp = csr_matrix(scaler.fit_transform(tr_te[f_num])) 66 | sparse_data.append(tmp) 67 | 68 | del(tr_te, train, test) 69 | ## sparse train and test data 70 | xtr_te = hstack(sparse_data, format = 'csr') 71 | xtrain = xtr_te[:ntrain, :] 72 | xtest = xtr_te[ntrain:, :] 73 | 74 | print('Dim train', xtrain.shape) 75 | print('Dim test', xtest.shape) 76 | 77 | del(xtr_te, sparse_data, tmp) 78 | ## neural net 79 | def nn_model(): 80 | model = Sequential() 81 | model.add(Dense(400, input_dim = xtrain.shape[1], init = 'he_normal')) 82 | model.add(PReLU()) 83 | model.add(Dropout(0.4)) 84 | model.add(Dense(200, init = 'he_normal')) 85 | model.add(PReLU()) 86 | model.add(Dropout(0.2)) 87 | model.add(Dense(1, init = 'he_normal')) 88 | model.compile(loss = 'mae', optimizer = 'adadelta') 89 | return(model) 90 | ## cv-folds 91 | nfolds = 5 92 | folds = KFold(len(y), n_folds = nfolds, shuffle = True) 93 | ## train models 94 | i = 0 95 | nbags = 5 96 | nepochs = 55 97 | pred_oob = np.zeros(xtrain.shape[0]) 98 | pred_test = np.zeros(xtest.shape[0]) 99 | 100 | for (inTr, inTe) in folds: 101 | xtr = xtrain[inTr] 102 | ytr = y[inTr] 103 | xte = xtrain[inTe] 104 | yte = y[inTe] 105 | pred = np.zeros(xte.shape[0]) 106 | for j in range(nbags): 107 | model = nn_model() 108 | fit = model.fit_generator(generator = batch_generator(xtr, ytr, 128, True), 109 | nb_epoch = nepochs, 110 | samples_per_epoch = xtr.shape[0], 111 | verbose = 0) 112 | pred += model.predict_generator(generator = batch_generatorp(xte, 800, False), val_samples = xte.shape[0])[:,0] 113 | pred_test += model.predict_generator(generator = batch_generatorp(xtest, 800, False), val_samples = xtest.shape[0])[:,0] 114 | pred /= nbags 115 | pred_oob[inTe] = pred 116 | score = mean_absolute_error(yte, pred) 117 | i += 1 118 | print('Fold ', i, '- MAE:', score) 119 | 120 | print('Total - MAE:', mean_absolute_error(y, pred_oob)) 121 | ## train predictions 122 | df = pd.DataFrame({'id': id_train, 'loss': pred_oob}) 123 | df.to_csv('preds_oob.csv', index = False) 124 | 125 | ## test predictions 126 | pred_test /= (nfolds*nbags) 127 | df = pd.DataFrame({'id': id_test, 'loss': pred_test}) 128 | df.to_csv('submission_keras.csv', index = False) 129 | -------------------------------------------------------------------------------- /recursion-cellular-image-classification/eda.py: -------------------------------------------------------------------------------- 1 | import os 2 | from data_loader import loader, val_loader 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim.lr_scheduler import ExponentialLR,CyclicLR 6 | from model_k import model_resnet_18 7 | from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer 8 | from ignite.metrics import Loss, Accuracy 9 | from ignite.contrib.handlers.tqdm_logger import ProgressBar 10 | from ignite.handlers import EarlyStopping, ModelCheckpoint 11 | import warnings 12 | from config import _get_default_config 13 | from scheduler import ParamScheduler, return_scale_fn 14 | from losses import FocalLoss 15 | 16 | 17 | model = model_resnet_18 18 | config = _get_default_config() 19 | MODEL_NAME = config.model 20 | 21 | path_data = '/mnt/ssd1/datasets/Recursion_class/' 22 | device = 'cuda' 23 | 24 | 25 | if config.warm_start: 26 | checkpoint_name = config.checkpoint_name 27 | checkpoint = torch.load(checkpoint_name) 28 | model.load_state_dict(checkpoint) 29 | model.to(device) 30 | 31 | os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" 32 | path = '/mnt/ssd1/datasets/Recursion_class' 33 | path_data = path 34 | device = 'cuda' 35 | batch_size = 64 36 | warnings.filterwarnings('ignore') 37 | 38 | 39 | criterion = nn.CrossEntropyLoss() 40 | criterion = FocalLoss() 41 | optimizer = torch.optim.Adam(model.parameters(), lr=0.0003) 42 | # optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0003, momentum=0.9) 43 | 44 | metrics = { 45 | 'loss': Loss(criterion), 46 | 'accuracy': Accuracy(), 47 | } 48 | 49 | 50 | trainer = create_supervised_trainer(model, optimizer, criterion, device=device) 51 | val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device) 52 | 53 | 54 | @trainer.on(Events.EPOCH_COMPLETED) 55 | def compute_and_display_val_metrics(engine): 56 | epoch = engine.state.epoch 57 | metrics = val_evaluator.run(val_loader).metrics 58 | print("Validation Results - Epoch: {} Average Loss: {:.4f} | Accuracy: {:.4f} " 59 | .format(engine.state.epoch, 60 | metrics['loss'], 61 | metrics['accuracy'])) 62 | 63 | 64 | # scale_fn = return_scale_fn() 65 | # lr_scheduler = CyclicLR(optimizer, base_lr=0.00001, max_lr=0.001, 66 | # mode='exp_range', gamma=1.1, cycle_momentum=False) 67 | lr_scheduler = ExponentialLR(optimizer, gamma=0.95) 68 | # lr_scheduler = ParamScheduler(optimizer, scale_fn, 500 * len(loader)) 69 | 70 | 71 | @trainer.on(Events.EPOCH_COMPLETED) 72 | def update_lr_scheduler(engine): 73 | lr_scheduler.step() 74 | lr = float(optimizer.param_groups[0]['lr']) 75 | print("Learning rate: {}".format(lr)) 76 | 77 | 78 | check_pointer = ModelCheckpoint('checkpoint_{}'.format(config.checkpoint_folder), MODEL_NAME, 79 | n_saved=3, create_dir=True, save_as_state_dict=True, 80 | score_function=lambda engine: engine.state.metrics['accuracy'], 81 | require_empty=False, score_name="val_accuracy" 82 | ) 83 | 84 | handler = EarlyStopping(patience=25, score_function=lambda engine: engine.state.metrics['accuracy'], 85 | trainer=trainer) 86 | val_evaluator.add_event_handler(Events.COMPLETED, handler) 87 | if config.all: 88 | name_exp = 'general' 89 | else: 90 | name_exp = config.experiment 91 | val_evaluator.add_event_handler(Events.COMPLETED, check_pointer, 92 | {'{}_site_{}'.format(name_exp, config.site): model}) 93 | 94 | 95 | @trainer.on(Events.EPOCH_STARTED) 96 | def turn_on_layers(engine): 97 | epoch = engine.state.epoch 98 | if epoch < 2: 99 | for name, child in model.named_children(): 100 | if name == 'fc' or name == 'classifier': 101 | pbar.log_message(name + ' is unfrozen') 102 | for param in child.parameters(): 103 | param.requires_grad = True 104 | else: 105 | pbar.log_message(name + ' is frozen') 106 | for param in child.parameters(): 107 | param.requires_grad = False 108 | elif epoch > 1: 109 | pbar.log_message("Turn on all the layers") 110 | for name, child in model.named_children(): 111 | for param in child.parameters(): 112 | param.requires_grad = True 113 | 114 | 115 | pbar = ProgressBar(bar_format='') 116 | pbar.attach(trainer, output_transform=lambda x: {'loss': x}) 117 | trainer.run(loader, max_epochs=500) 118 | 119 | 120 | # with torch.no_grad(): 121 | # preds = np.empty(0) 122 | # for x, _ in tqdm_notebook(tloader): 123 | # x = x.to(device) 124 | # output = model_resnet_18(x) 125 | # idx = output.max(dim=-1)[1].cpu().numpy() 126 | # preds = np.append(preds, idx, axis=0) 127 | # 128 | # 129 | # submission = pd.read_csv(path_data + '/test.csv') 130 | # submission['sirna'] = preds.astype(int) 131 | # submission.to_csv('submission_1.csv', index=False, columns=['id_code', 'sirna']) 132 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/11. TRPO, PPO and ACKTR Methods/11.01. Trust Region Policy Optimization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Trust Region Policy Optimization\n", 8 | "\n", 9 | "Trust region policy optimization shortly known as TRPO is one of the most popularly used algorithms in deep reinforcement learning. TRPO is a policy gradient algorithm and it acts as an improvement to the policy gradient with baseline we learned in chapter 8. We learned that policy gradient is an on-policy algorithm meaning that on every iteration, we improve the same policy with which we are generating trajectories. On every iteration, we update the parameter of our network and try to find the improved policy. The update rule for updating the parameter $\\theta$ of our network is given as follows:\n", 10 | "\n", 11 | "$$\\theta = \\theta + \\alpha \\nabla_{\\theta} J({\\theta}) $$\n", 12 | "\n", 13 | "Where $\\nabla_{\\theta} J({\\theta}) $ is the gradient and $\\alpha$ is known as the step size or learning rate. If the step size is large then there will be a large policy update and if it is small then there will be a small update in the policy. How can we find an optimal step size? In the policy gradient method, we keep the step size small and so on every iteration there will be a small improvement in the policy.\n", 14 | "\n", 15 | "\n", 16 | "But what happens if we take a large step on every iteration? Let's suppose we have a policy $\\pi$ parameterized by $\\theta$. So, on every iteration updating $\\theta$ implies that we are improving our policy. If the step size is large then the policy on every iteration varies greatly, that is, the old policy (policy used in the previous iteration) and the new policy (policy used in the current iteration) vary greatly. \n", 17 | "\n", 18 | "\n", 19 | "We learned that if the step size large is then the new policy and old policy will vary greatly. Since we are using parametrized policy, it implies that if we make a large update (large step size) then the parameter of old policy and new policy very heavily and this leads to a problem called model collapse.\n", 20 | "\n", 21 | "This is the reason, in the policy gradient method, instead of taking larger steps and update the parameter of our network we take a small step and update the parameter to keep the old policy and new policy closer. But how can we improve this? Can we take a larger step along with maintaining the old policy and new policy closer so that it won't affect our model performance and it will also help us to learn quickly? Yes, this problem is exactly solved by TRPO.\n", 22 | "\n", 23 | "TRPO tries to make a large policy update while imposing a constraint that old policy and new policy should not vary too much. Okay, what is this constraint? But first, how can we measure and understand if the old policy and new policy are changing greatly? Here is where we use a measure called KL divergence. The KL divergence is ubiquitous in reinforcement learning. It tells us how two probability distributions are different from each other. So, we can use the KL divergence to understand if our old policy and new policy varies greatly or not. TRPO adds a constraint that the KL divergence between the old policy and new policy should be less than or equal to some constant $\\delta$. That is, when we make a policy update, old policy and a new policy should not vary more than some constant. This constraint is called trust region constraint. \n", 24 | "\n", 25 | "Thus, TRPO tries to make a large policy update while imposing the constraint that the parameter of the old policy and a new policy should be within the trust region. Note that in the policy gradient method, we use the parameterized policy. Thus, keeping the parameter of the old policy and new policy within the trust region implies that the old policy and new policy is within the trust region.\n", 26 | "\n", 27 | "TRPO guarantees monotonic policy improvement, that is, it guarantees that there will always be a policy improvement on every iteration. This is the fundamental idea behind the TRPO algorithm. \n", 28 | "\n", 29 | "To understand how exactly TRPO works, we should understand the math behind TRPO. TRPO has pretty heavy math. But worry not! It will be simple if we understand the fundamental math concepts required to understand TRPO. So, before diving into the TRPO algorithm, first, we will understand several essential math concepts that are required to understand TRPO. Then we will learn how to design TRPO objective function with the trust region constraint and in the end, we will see how to solve the TRPO objective function. \n", 30 | "\n", 31 | "\n" 32 | ] 33 | } 34 | ], 35 | "metadata": { 36 | "kernelspec": { 37 | "display_name": "Python 3", 38 | "language": "python", 39 | "name": "python3" 40 | }, 41 | "language_info": { 42 | "codemirror_mode": { 43 | "name": "ipython", 44 | "version": 3 45 | }, 46 | "file_extension": ".py", 47 | "mimetype": "text/x-python", 48 | "name": "python", 49 | "nbconvert_exporter": "python", 50 | "pygments_lexer": "ipython3", 51 | "version": "3.6.9" 52 | } 53 | }, 54 | "nbformat": 4, 55 | "nbformat_minor": 2 56 | } 57 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/05. Understanding Temporal Difference Learning/5.01. TD Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TD learning\n", 8 | "\n", 9 | "The Temporal Difference (TD) learning algorithm was introduced by Richard S. Sutton in 1988. In the introduction of the chapter, we learned that the reason for the TD method to be more popular is that it takes the advantages of the dynamic programming method and the Monte Carlo method. But what are those advantages?\n", 10 | "\n", 11 | "First, let's recap quickly the advantages and disadvantages of dynamic programming and the Monte Carlo method.\n", 12 | "\n", 13 | "**Dynamic Programming** - The advantage of the dynamic programming (DP) method is that it uses the Bellman equation to compute the value of a state. That is, we learned that according to the Bellman equation, the value of a state can be obtained as a sum of the immediate reward and the discounted value of the next state. This is called bootstrapping. That is, to compute the value of a state, we don't have to wait till the end of the episode, instead, using the Bellman equation, we can estimate the value of a state just based on the value of the next state and this is called bootstrapping.\n", 14 | "\n", 15 | "Remember how we estimated the value function in dynamic programming methods (value and policy iteration)? We estimated the value function (value of a state) as $V(s) = \\sum_{s'} P_{ss'}^a [R_{ss'}^a + \\gamma V^{}(s')] $ . As you may recollect, we learned that in order to find the value of a state, we didn't have to wait till the end of the episode, instead, we bootstrap, that is we estimate the value of the current state $V(s)$ by estimating the value of the next state $V(s')$. \n", 16 | "\n", 17 | "However, the disadvantage of dynamic programming is that we can apply the DP method only when we know the model dynamics of the environment. That is, we learned that the DP is a model-based method and we should know the transition probabilities in order to use the DP method. When we don't know the model dynamics of the environment then we cannot apply the DP method. \n", 18 | "\n", 19 | "**Monte Carlo Method** - The advantage of the Monte Carlo method is that is it is a model-free method which means that it does not require the model dynamics of the environment to be known in order to estimate the value and Q function. \n", 20 | "\n", 21 | "However, the disadvantage of the Monte Carlo method is that in order to estimate the state value or Q value we need to wait till the end of the episode and if the episode is long then it will cost us a lot of time. Also, we cannot apply Monte Carlo methods to continuous tasks (non-episodic tasks). \n", 22 | "\n", 23 | "\n", 24 | "Now let's get back to TD learning. The TD learning algorithm takes the benefits of both the dynamic programming and the Monte Carlo methods into account. That is just like a dynamic programming method, we perform bootstrap so that we don't have to wait till the end of an episode to compute the state value or Q value and just like the Monte Carlo method, it is a model-free method and so it does not require the model dynamics of the environment to compute the state value or Q value. Now that we have understood the basic idea behind the TD learning algorithm, let's get into detail and learn how exactly it works. \n", 25 | "\n", 26 | "Similar to what we learned in the Monte Carlo chapter, we can use the TD learning algorithm for both the prediction and control tasks and so we can categorize TD learning into:\n", 27 | "\n", 28 | "* TD Prediction \n", 29 | "* TD Control \n", 30 | "\n", 31 | "We already learned what does prediction and control method means in the previous chapter. Let us recap that a bit before going forward. \n", 32 | "\n", 33 | "In the prediction method, a policy is given as an input and we try to predict the value function or Q function using the given policy. If we predict the value function using the given policy then we can say how good it is for the agent to be in each state if it uses the given policy. That is, we can say that what is the expected return an agent can get in each state if it acts according to the given policy. \n", 34 | "\n", 35 | "In the Control method, we will not be given any policy as an input and the goal in the control method is to find the optimal policy. So, we will initialize a random policy and then we try to find the optimal policy iteratively. That is, we try to find an optimal policy that gives the maximum return. \n", 36 | "\n", 37 | "First, let us understand how can we use TD learning to perform prediction task and then we will learn how to use TD learning for the control task." 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.6.9" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 2 62 | } 63 | -------------------------------------------------------------------------------- /Hands-on Reinforcement Learning with Python, Second Edition_new chapters/01. Fundamentals of Reinforcement Learning/1.01. Basic Idea of Reinforcement Learning .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "Reinforcement learning (RL) is a branch of machine learning where learning occurs by interacting with an environment, unlike other machine learning paradigms like supervised and unsupervised learning. Reinforcement learning is one of the most active areas of research in artificial intelligence and it is believed that RL will take us a step closer towards achieving artificial general intelligence. In this chapter, we will build a strong foundation of reinforcement learning by understating several important and fundamental concepts involved in reinforcement learning. \n", 10 | "\n", 11 | "In this chapter, we will learn about the following topics:\n", 12 | "\n", 13 | "* Basic idea of reinforcement learning\n", 14 | "* Key elements of reinforcement learning\n", 15 | "* Reinforcement learning algorithm\n", 16 | "* How RL differs from other ML paradigms?\n", 17 | "* Markov Decision Processes\n", 18 | "* Fundamental concepts of reinforcement learning\n", 19 | "* Applications of Reinforcement Learning\n", 20 | "* Reinforcement learning glossary" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Basic Idea of Reinforcement Learning \n", 28 | "\n", 29 | "Let's begin with an analogy. Let's suppose we are teaching the dog to catch a ball. Instead of teaching the dog explicitly to catch a ball, we will just throw a ball and every time the dog catches the ball, we will give the dog a cookie. If the dog fails to catch the ball then we will not give a cookie. So, the dog will figure out what action caused it to receive a cookie and repeat that action. Thus, the dog will understand that catching the ball caused it to receive a cookie and repeat catching the ball. Thus, in this way the dog will learn to catch a ball while aiming for maximizing the cookies it can receive. \n", 30 | "\n", 31 | "Similarly, in a reinforcement learning setting, we will not teach the agent what to do or how to do, instead, we will give a reward to the agent for every action it does. We will give a positive reward to the agent when it performs a good action and we will give a negative reward to the agent when it performs a bad action. The agent begins by performing a random action and if the action is good then we will give the agent a positive reward so that the agent will understand it has performed a good action and it will repeat that action and if the action performed by the agent is bad then we will give the agent a negative reward so that the agent will understand it has performed a bad action and it will not repeat that action. \n", 32 | "\n", 33 | "Thus, reinforcement learning can be viewed as a trial and error learning process where the agent tries out different actions and learns the good action which gives a positive reward. \n", 34 | "\n", 35 | "In the dog analogy, we learned, the dog represents the agent, giving a cookie to the dog upon catching the ball is a positive reward and not giving a cookie is a negative reward. So, the dog (agent) explores different actions which are catching a ball and not catching a ball and understands that catching the ball is a good action as it brings the dog a positive reward (getting a cookie).\n", 36 | "\n", 37 | "Let's understand the idea of reinforcement learning with one more simple example. Let's suppose we want to teach a robot (agent) to walk without getting hit by a mountain as shown below:\n", 38 | "\n", 39 | "![title](Images/1.png)\n", 40 | "\n", 41 | "We will not teach the robot explicitly to not go in the direction of the mountain, instead, if the robot hits the mountain and get stuck, we give the robot a negative reward, say -1. So, the robot will understand that hitting the mountain is the wrong action and it will not repeat that action again:\n", 42 | "\n", 43 | "\n", 44 | "![title](Images/2.png)\n", 45 | "Similarly, when the robot walk in the right direction without getting hit by a mountain, we will give the robot a positive reward, say +1. So, the robot will understand that not hitting the mountain is the good action and it will repeat that action:\n", 46 | "\n", 47 | "![title](Images/3.png)\n", 48 | "\n", 49 | "Thus, in the reinforcement learning setting, the agent explores different actions and learns the best action based on the reward it gets. \n", 50 | "\n", 51 | "Now that we have a basic idea of how reinforcement learning works, in the upcoming sections, we will get into more details and also learn the important concepts involved in reinforcement learning. \n", 52 | "\n" 53 | ] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "Python 3", 59 | "language": "python", 60 | "name": "python3" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": { 64 | "name": "ipython", 65 | "version": 3 66 | }, 67 | "file_extension": ".py", 68 | "mimetype": "text/x-python", 69 | "name": "python", 70 | "nbconvert_exporter": "python", 71 | "pygments_lexer": "ipython3", 72 | "version": "3.6.9" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 2 77 | } 78 | -------------------------------------------------------------------------------- /tips_tricks/Video 1.1 Improving your models using Feature engineering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Types of scaling**:\n", 8 | "\n", 9 | "* MinMaxScaler - scales all features to $[a, b]$ range\n", 10 | "\n", 11 | "* StandardScaler - removes mean and divides by variance of all features. $X^{new}_i = \\frac{X_i - \\mu}{\\sigma}$, where $\\mu $is for mean and $\\sigma$ is for variance\n", 12 | "\n", 13 | "* RobustScaler - same as StandardScaler but removes median and divides by IQR\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 7, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import pandas as pd\n", 25 | "import numpy as np" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 8, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "df = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv\",sep = ';')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 10, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "y = df.pop('quality')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 11, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "for i in df.columns:\n", 57 | " df[i] = df[i].fillna(np.mean(df[i]))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 12, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "from sklearn.preprocessing import StandardScaler,MinMaxScaler,RobustScaler\n", 69 | "from sklearn.linear_model import Ridge\n", 70 | "from sklearn.model_selection import train_test_split\n", 71 | "from sklearn.metrics import mean_squared_error" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 13, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "np.random.seed(42)\n", 83 | "train,test,y_train,y_test = train_test_split(df,y,test_size = 0.1)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 14, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "def fit_predict(train,test,y_train,y_test,scaler = None):\n", 95 | " if scaler is None:\n", 96 | " lr = Ridge()\n", 97 | " lr.fit(train,y_train)\n", 98 | " y_pred = lr.predict(test)\n", 99 | " print('MSE score:', mean_squared_error(y_test,y_pred))\n", 100 | " else:\n", 101 | " train_scaled = scaler.fit_transform(train)\n", 102 | " test_scaled = scaler.transform(test)\n", 103 | " lr = Ridge()\n", 104 | " lr.fit(train_scaled,y_train)\n", 105 | " y_pred = lr.predict(test_scaled)\n", 106 | " print('MSE score:', mean_squared_error(y_test,y_pred))" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 15, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "MSE score: 0.57404414001\n" 119 | ] 120 | } 121 | ], 122 | "source": [ 123 | "fit_predict(train,test,y_train,y_test)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 16, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "MSE score: 0.567545067343\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "fit_predict(train,test,y_train,y_test,MinMaxScaler())" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 17, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "MSE score: 0.558144966334\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "fit_predict(train,test,y_train,y_test,StandardScaler())" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 18, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "MSE score: 0.55823299573\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "fit_predict(train,test,y_train,y_test,RobustScaler())" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python 3", 181 | "language": "python", 182 | "name": "python3" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.6.2" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 2 199 | } 200 | --------------------------------------------------------------------------------