├── .gitignore ├── README.md ├── README_EN.md ├── data ├── Advertising.csv ├── BZ_010201_160207.csv ├── Default.csv ├── EURRUB_010201_160208.csv ├── GDP.csv ├── SAT_GPA.csv ├── USDRUB_010201_160207.csv ├── auto-mpg.data ├── boston-house-price.csv ├── cars.csv ├── data-4275-2021-02-09.xlsx └── mo.geojson ├── env ├── README.md ├── conda │ └── mlmethods.yaml └── docker │ ├── Dockerfile │ ├── Makefile │ ├── entrypoint.sh │ └── requirements.txt ├── lib ├── datasets │ ├── __init__.py │ ├── fashion_mnist.py │ └── twenty_languages.py ├── plot_confusion_matrix.py ├── plot_stats.py └── plot_utils.py └── notebooks ├── C0_PyBasics.ipynb ├── C1_Intro.ipynb ├── C2_Matplotlib.ipynb ├── C2_Numpy.ipynb ├── C2_Pandas.ipynb ├── C2_Pandas_Places.ipynb ├── C3_GD.ipynb ├── C3_GD_Appendix.ipynb ├── C3_Inference.ipynb ├── C3_Linear_Regression.ipynb ├── C3_SGD.ipynb ├── C3_SGD_Appendix.ipynb ├── C3_Sklearn_Basics.ipynb ├── C4_Correlation.ipynb ├── C4_Distributions.ipynb ├── C4_Statistics.ipynb ├── C4_Statistics_Examples.ipynb ├── C5_Classification.ipynb ├── C5_DT.ipynb ├── C5_DT_Housing.ipynb ├── C5_DT_Purchase.ipynb ├── C5_HAR.ipynb ├── C5_Imbalanced_Classification.ipynb ├── C5_Language_Detector.ipynb ├── C5_Linear_Regression_Fuel_Consumption.ipynb ├── C5_Logistic_Regression.ipynb ├── C5_NN_Text.ipynb ├── C5_Polynomial_Regression.ipynb ├── C5_Regression.ipynb ├── C6_CV.ipynb ├── C6_Metrics.ipynb ├── C6_Regularization.ipynb ├── C7_Audio_Recognition.ipynb ├── C7_Feature_Selection.ipynb ├── C7_Text_Classification.ipynb ├── C8_Clustering.ipynb ├── C8_Number_of_Clusters.ipynb ├── C8_PCA.ipynb ├── C8_Preprocessing_And_SemiSupervised.ipynb ├── C9_Topic_Modeling.ipynb ├── css └── style.css ├── img ├── gd-one-var-alpha-left.png ├── gd-one-var-alpha-right.png ├── gd-two-var-alpha-left-sum.png ├── gd-two-var-alpha-left.png ├── gd-two-var-alpha-right-sum.png ├── gd-two-var-alpha-right.png ├── sgd-two-var-alpha-left.png ├── sgd-two-var-alpha-right.png └── vectors.png └── nn ├── C5_NN.ipynb └── C5_NN_Embedding.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | debug* 3 | __pycache__ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Семинары 2 | 3 | С.Ю. Папулин (papulin.study@yandex.ru) 4 | 5 | 6 | ## Средства разработки и библиотеки 7 | 8 | [Развертывание среды разработки](env/README.md) 9 | 10 | Дополнительно: 11 | - geopandas: `pip install geopandas==0.13.2` 12 | - folium: `pip install folium` 13 | - gensim: `pip install gensim` 14 | - tensorflow: `pip install tensorflow==2.16.1` 15 | 16 | ## Пререквизиты 17 | 18 | - [Основы программирования на Python](notebooks/C0_PyBasics.ipynb) 19 | 20 | ## Темы 21 | 22 | 1. [Введение](notebooks/C1_Intro.ipynb) 23 | 2. Основные средства 24 | - [Numpy](notebooks/C2_Numpy.ipynb) 25 | - [Matplotlib](notebooks/C2_Matplotlib.ipynb) 26 | - Pandas 27 | - [Основные команды](notebooks/C2_Pandas.ipynb) 28 | - [Пример с общепитом](notebooks/C2_Pandas_Places.ipynb) 29 | - [Основы разработки под sklearn](notebooks/C3_Sklearn_Basics.ipynb) 30 | 3. Статистика 31 | - [Общие понятия](https://nbviewer.jupyter.org/github/MLMethods/Practice/blob/master/notebooks/C4_Statistics.ipynb) 32 | - [Законы распределения](notebooks/C4_Distributions.ipynb) 33 | - [Корреляция](notebooks/C4_Correlation.ipynb) 34 | - [Дополнительный пример](notebooks/C4_Statistics_Examples.ipynb) 35 | 4. Градиентный спуск 36 | - [Градиентный спуск](notebooks/C3_GD.ipynb) 37 | - [Стохастический градиентный спуск](notebooks/C3_SGD.ipynb) 38 | 5. Регрессия 39 | - [Обучение линейной регрессии: МНК, ГС, СГС](notebooks/C3_Linear_Regression.ipynb) 40 | - [Линейная регрессия и статистический вывод](notebooks/C3_Inference.ipynb) 41 | - [Линейная полиномиальная регрессия](notebooks/C5_Polynomial_Regression.ipynb) 42 | - [Пример с расходом топлива](notebooks/C5_Linear_Regression_Fuel_Consumption.ipynb) 43 | - [Регрессия](notebooks/C5_Regression.ipynb) 44 | 6. Классификация 45 | - [Логистическая регрессия](notebooks/C5_Logistic_Regression.ipynb) 46 | - [Классификация с несбалансированной выборкой](notebooks/C5_Imbalanced_Classification.ipynb) 47 | - [Распознавание активности](notebooks/C5_HAR.ipynb) 48 | - [Распознавание голосовых команд](notebooks/C7_Audio_Recognition.ipynb) 49 | - [Классификация](notebooks/C5_Classification.ipynb) 50 | 7. Деревья решений и их ансамбли 51 | - [Деревья решений и их ансамбли](notebooks/C5_DT.ipynb) 52 | - [Деревья решений для задачи регрессии](notebooks/C5_DT_Housing.ipynb) 53 | - [Пример с предсказанием покупок](notebooks/C5_DT_Purchase.ipynb) 54 | 8. [Метрики качества](notebooks/C6_Metrics.ipynb) 55 | 9. Выбор модели 56 | - [Оценка качества и выбор модели с использованием кросс-валидации](notebooks/C6_CV.ipynb) 57 | - [Регуляризация и выбор модели](notebooks/C6_Regularization.ipynb) 58 | 10. [Выбор признаков](notebooks/C7_Feature_Selection.ipynb) 59 | 11. [Классификация текстов](notebooks/C7_Text_Classification.ipynb) 60 | 12. Нейронные сети 61 | - [Нейронные сети](notebooks/nn/C5_NN.ipynb) 62 | - [Векторное представление слов](notebooks/nn/C5_NN_Embedding.ipynb) 63 | 13. Кластеризация 64 | - [Основные методы](notebooks/C8_Clustering.ipynb) 65 | - [Определение количества кластеров](notebooks/C8_Number_of_Clusters.ipynb) 66 | - [Использование кластеризации при классификации данных](notebooks/C8_Preprocessing_And_SemiSupervised.ipynb) 67 | 14. [Метод главных компонент и уменьшение размерности](notebooks/C8_PCA.ipynb) 68 | 15. [Тематическое моделирование](notebooks/C9_Topic_Modeling.ipynb) 69 | 70 | 71 | -------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- 1 |
RU
2 | 3 | # Practice 4 | 5 | Sergei Yu. Papulin (papulin.study@yandex.ru) 6 | 7 | ## IDE and libraries 8 | 9 | - [Anaconda Python 3.x](https://www.anaconda.com/distribution/): 10 | - jupyter 11 | - numpy 12 | - scipy 13 | - pandas 14 | - sklearn 15 | - nltk 16 | - gensim: `pip install gensim` 17 | 18 | ## Topics 19 | 20 | 1. [Introduction](notebooks/C1_Intro.ipynb) 21 | 2. [Numpy for Vector and Matrix](notebooks/C2_Numpy.ipynb) 22 | 3. [Matplotlib](notebooks/C2_Matplotlib.ipynb) 23 | 4. [Pandas](notebooks/C2_Pandas.ipynb) 24 | 5. [OLS and Gradient Descent](notebooks/C3_GD.ipynb) 25 | 6. [Stochastic Gradient Descent](notebooks/C3_SGD.ipynb) 26 | 7. Statistics 27 | - [Basic Notions](notebooks/C4_Distributions.ipynb) 28 | - [Distributions](notebooks/C4_Distributions.ipynb) 29 | - [Correlation](notebooks/C4_Correlation.ipynb) 30 | - [Additional examples](notebooks/C4_Statistics_Examples.ipynb) 31 | 8. [Regression](notebooks/C5_Regression.ipynb) 32 | 9. [Classification](notebooks/C5_Classification.ipynb) 33 | 10. [Decision Tree](notebooks/C5_DT.ipynb) 34 | 11. [Metrics](notebooks/C6_Metrics.ipynb) 35 | 12. [Text Classification using Naive Bayes Models](notebooks/C7_Text_Classification.ipynb) 36 | 13. Clustering 37 | - [Basic methods](notebooks/C8_Clustering.ipynb) 38 | - [Determining the number of clusters ](notebooks/C8_Number_of_Clusters.ipynb) 39 | 14. [PCA and Dimension Reduction](notebooks/C8_PCA.ipynb) 40 | 15. [Topic Modeling](notebooks/C9_Topic_Modeling.ipynb) -------------------------------------------------------------------------------- /data/Advertising.csv: -------------------------------------------------------------------------------- 1 | ,TV,radio,newspaper,sales 2 | 1,230.1,37.8,69.2,22.1 3 | 2,44.5,39.3,45.1,10.4 4 | 3,17.2,45.9,69.3,9.3 5 | 4,151.5,41.3,58.5,18.5 6 | 5,180.8,10.8,58.4,12.9 7 | 6,8.7,48.9,75,7.2 8 | 7,57.5,32.8,23.5,11.8 9 | 8,120.2,19.6,11.6,13.2 10 | 9,8.6,2.1,1,4.8 11 | 10,199.8,2.6,21.2,10.6 12 | 11,66.1,5.8,24.2,8.6 13 | 12,214.7,24,4,17.4 14 | 13,23.8,35.1,65.9,9.2 15 | 14,97.5,7.6,7.2,9.7 16 | 15,204.1,32.9,46,19 17 | 16,195.4,47.7,52.9,22.4 18 | 17,67.8,36.6,114,12.5 19 | 18,281.4,39.6,55.8,24.4 20 | 19,69.2,20.5,18.3,11.3 21 | 20,147.3,23.9,19.1,14.6 22 | 21,218.4,27.7,53.4,18 23 | 22,237.4,5.1,23.5,12.5 24 | 23,13.2,15.9,49.6,5.6 25 | 24,228.3,16.9,26.2,15.5 26 | 25,62.3,12.6,18.3,9.7 27 | 26,262.9,3.5,19.5,12 28 | 27,142.9,29.3,12.6,15 29 | 28,240.1,16.7,22.9,15.9 30 | 29,248.8,27.1,22.9,18.9 31 | 30,70.6,16,40.8,10.5 32 | 31,292.9,28.3,43.2,21.4 33 | 32,112.9,17.4,38.6,11.9 34 | 33,97.2,1.5,30,9.6 35 | 34,265.6,20,0.3,17.4 36 | 35,95.7,1.4,7.4,9.5 37 | 36,290.7,4.1,8.5,12.8 38 | 37,266.9,43.8,5,25.4 39 | 38,74.7,49.4,45.7,14.7 40 | 39,43.1,26.7,35.1,10.1 41 | 40,228,37.7,32,21.5 42 | 41,202.5,22.3,31.6,16.6 43 | 42,177,33.4,38.7,17.1 44 | 43,293.6,27.7,1.8,20.7 45 | 44,206.9,8.4,26.4,12.9 46 | 45,25.1,25.7,43.3,8.5 47 | 46,175.1,22.5,31.5,14.9 48 | 47,89.7,9.9,35.7,10.6 49 | 48,239.9,41.5,18.5,23.2 50 | 49,227.2,15.8,49.9,14.8 51 | 50,66.9,11.7,36.8,9.7 52 | 51,199.8,3.1,34.6,11.4 53 | 52,100.4,9.6,3.6,10.7 54 | 53,216.4,41.7,39.6,22.6 55 | 54,182.6,46.2,58.7,21.2 56 | 55,262.7,28.8,15.9,20.2 57 | 56,198.9,49.4,60,23.7 58 | 57,7.3,28.1,41.4,5.5 59 | 58,136.2,19.2,16.6,13.2 60 | 59,210.8,49.6,37.7,23.8 61 | 60,210.7,29.5,9.3,18.4 62 | 61,53.5,2,21.4,8.1 63 | 62,261.3,42.7,54.7,24.2 64 | 63,239.3,15.5,27.3,15.7 65 | 64,102.7,29.6,8.4,14 66 | 65,131.1,42.8,28.9,18 67 | 66,69,9.3,0.9,9.3 68 | 67,31.5,24.6,2.2,9.5 69 | 68,139.3,14.5,10.2,13.4 70 | 69,237.4,27.5,11,18.9 71 | 70,216.8,43.9,27.2,22.3 72 | 71,199.1,30.6,38.7,18.3 73 | 72,109.8,14.3,31.7,12.4 74 | 73,26.8,33,19.3,8.8 75 | 74,129.4,5.7,31.3,11 76 | 75,213.4,24.6,13.1,17 77 | 76,16.9,43.7,89.4,8.7 78 | 77,27.5,1.6,20.7,6.9 79 | 78,120.5,28.5,14.2,14.2 80 | 79,5.4,29.9,9.4,5.3 81 | 80,116,7.7,23.1,11 82 | 81,76.4,26.7,22.3,11.8 83 | 82,239.8,4.1,36.9,12.3 84 | 83,75.3,20.3,32.5,11.3 85 | 84,68.4,44.5,35.6,13.6 86 | 85,213.5,43,33.8,21.7 87 | 86,193.2,18.4,65.7,15.2 88 | 87,76.3,27.5,16,12 89 | 88,110.7,40.6,63.2,16 90 | 89,88.3,25.5,73.4,12.9 91 | 90,109.8,47.8,51.4,16.7 92 | 91,134.3,4.9,9.3,11.2 93 | 92,28.6,1.5,33,7.3 94 | 93,217.7,33.5,59,19.4 95 | 94,250.9,36.5,72.3,22.2 96 | 95,107.4,14,10.9,11.5 97 | 96,163.3,31.6,52.9,16.9 98 | 97,197.6,3.5,5.9,11.7 99 | 98,184.9,21,22,15.5 100 | 99,289.7,42.3,51.2,25.4 101 | 100,135.2,41.7,45.9,17.2 102 | 101,222.4,4.3,49.8,11.7 103 | 102,296.4,36.3,100.9,23.8 104 | 103,280.2,10.1,21.4,14.8 105 | 104,187.9,17.2,17.9,14.7 106 | 105,238.2,34.3,5.3,20.7 107 | 106,137.9,46.4,59,19.2 108 | 107,25,11,29.7,7.2 109 | 108,90.4,0.3,23.2,8.7 110 | 109,13.1,0.4,25.6,5.3 111 | 110,255.4,26.9,5.5,19.8 112 | 111,225.8,8.2,56.5,13.4 113 | 112,241.7,38,23.2,21.8 114 | 113,175.7,15.4,2.4,14.1 115 | 114,209.6,20.6,10.7,15.9 116 | 115,78.2,46.8,34.5,14.6 117 | 116,75.1,35,52.7,12.6 118 | 117,139.2,14.3,25.6,12.2 119 | 118,76.4,0.8,14.8,9.4 120 | 119,125.7,36.9,79.2,15.9 121 | 120,19.4,16,22.3,6.6 122 | 121,141.3,26.8,46.2,15.5 123 | 122,18.8,21.7,50.4,7 124 | 123,224,2.4,15.6,11.6 125 | 124,123.1,34.6,12.4,15.2 126 | 125,229.5,32.3,74.2,19.7 127 | 126,87.2,11.8,25.9,10.6 128 | 127,7.8,38.9,50.6,6.6 129 | 128,80.2,0,9.2,8.8 130 | 129,220.3,49,3.2,24.7 131 | 130,59.6,12,43.1,9.7 132 | 131,0.7,39.6,8.7,1.6 133 | 132,265.2,2.9,43,12.7 134 | 133,8.4,27.2,2.1,5.7 135 | 134,219.8,33.5,45.1,19.6 136 | 135,36.9,38.6,65.6,10.8 137 | 136,48.3,47,8.5,11.6 138 | 137,25.6,39,9.3,9.5 139 | 138,273.7,28.9,59.7,20.8 140 | 139,43,25.9,20.5,9.6 141 | 140,184.9,43.9,1.7,20.7 142 | 141,73.4,17,12.9,10.9 143 | 142,193.7,35.4,75.6,19.2 144 | 143,220.5,33.2,37.9,20.1 145 | 144,104.6,5.7,34.4,10.4 146 | 145,96.2,14.8,38.9,11.4 147 | 146,140.3,1.9,9,10.3 148 | 147,240.1,7.3,8.7,13.2 149 | 148,243.2,49,44.3,25.4 150 | 149,38,40.3,11.9,10.9 151 | 150,44.7,25.8,20.6,10.1 152 | 151,280.7,13.9,37,16.1 153 | 152,121,8.4,48.7,11.6 154 | 153,197.6,23.3,14.2,16.6 155 | 154,171.3,39.7,37.7,19 156 | 155,187.8,21.1,9.5,15.6 157 | 156,4.1,11.6,5.7,3.2 158 | 157,93.9,43.5,50.5,15.3 159 | 158,149.8,1.3,24.3,10.1 160 | 159,11.7,36.9,45.2,7.3 161 | 160,131.7,18.4,34.6,12.9 162 | 161,172.5,18.1,30.7,14.4 163 | 162,85.7,35.8,49.3,13.3 164 | 163,188.4,18.1,25.6,14.9 165 | 164,163.5,36.8,7.4,18 166 | 165,117.2,14.7,5.4,11.9 167 | 166,234.5,3.4,84.8,11.9 168 | 167,17.9,37.6,21.6,8 169 | 168,206.8,5.2,19.4,12.2 170 | 169,215.4,23.6,57.6,17.1 171 | 170,284.3,10.6,6.4,15 172 | 171,50,11.6,18.4,8.4 173 | 172,164.5,20.9,47.4,14.5 174 | 173,19.6,20.1,17,7.6 175 | 174,168.4,7.1,12.8,11.7 176 | 175,222.4,3.4,13.1,11.5 177 | 176,276.9,48.9,41.8,27 178 | 177,248.4,30.2,20.3,20.2 179 | 178,170.2,7.8,35.2,11.7 180 | 179,276.7,2.3,23.7,11.8 181 | 180,165.6,10,17.6,12.6 182 | 181,156.6,2.6,8.3,10.5 183 | 182,218.5,5.4,27.4,12.2 184 | 183,56.2,5.7,29.7,8.7 185 | 184,287.6,43,71.8,26.2 186 | 185,253.8,21.3,30,17.6 187 | 186,205,45.1,19.6,22.6 188 | 187,139.5,2.1,26.6,10.3 189 | 188,191.1,28.7,18.2,17.3 190 | 189,286,13.9,3.7,15.9 191 | 190,18.7,12.1,23.4,6.7 192 | 191,39.5,41.1,5.8,10.8 193 | 192,75.5,10.8,6,9.9 194 | 193,17.2,4.1,31.6,5.9 195 | 194,166.8,42,3.6,19.6 196 | 195,149.7,35.6,6,17.3 197 | 196,38.2,3.7,13.8,7.6 198 | 197,94.2,4.9,8.1,9.7 199 | 198,177,9.3,6.4,12.8 200 | 199,283.6,42,66.2,25.5 201 | 200,232.1,8.6,8.7,13.4 202 | -------------------------------------------------------------------------------- /data/BZ_010201_160207.csv: -------------------------------------------------------------------------------- 1 | ;;;