├── .gitignore ├── README.md ├── code ├── ch10 │ ├── 1_softmax_classifier_with_numpy.ipynb │ ├── 2_multiclass_with_sklearn.ipynb │ ├── 3_classification_service.ipynb │ ├── 5_pipeline_example.ipynb │ ├── 7_grid_search.ipynb │ ├── logicmodel.pkl │ ├── min_max.npy │ ├── my.png │ └── theta_bin.npy ├── ch100 │ ├── -log_2.png │ ├── AllElectronics.csv │ ├── log_2.png │ ├── rental_price.csv │ ├── titanic.csv │ ├── titanic.png │ └── vegeterianl_dataset.csv ├── ch11 │ ├── 1_simple_naive_bayes.ipynb │ ├── 2_german_credit_application.ipynb │ ├── 3_nb_classifier_with_sklearn.ipynb │ ├── 4_news_groups_analysis.ipynb │ ├── 99_bag_of_words.ipynb │ ├── 99_old_news_groups_analysis.ipynb │ ├── 99_spam_filter_with_sklearn.ipynb │ ├── data │ │ ├── 20021010_easy_ham.tar.bz2 │ │ ├── 20021010_hard_ham.tar.bz2 │ │ └── 20021010_spam.tar.bz2 │ ├── fraud.csv │ └── imbalanced_dataset.ipynb ├── ch12 │ ├── 1_entropy.ipynb │ ├── 2_DT_algorithm.ipynb │ ├── 3_Gini_Index.ipynb │ ├── 4_decision_tree_w_sklearn.ipynb │ ├── 5_split_continuous_attribute.ipynb │ ├── 6_regress_tree_with_sklearn.ipynb │ ├── AllElectronics.csv │ ├── rental_price.csv │ ├── titanic.png │ ├── titanic │ │ ├── test.csv │ │ └── train.csv │ └── vegeterianl_dataset.csv ├── ch13 │ ├── 1_ensemble.ipynb │ ├── 2_bagging.ipynb │ ├── 3_random_forest.ipynb │ ├── 4_adaboost.ipynb │ ├── 5_gradient_boosting.ipynb │ ├── 6_xgboost.ipynb │ ├── 6_xgboost.py │ ├── 7_lightgbm.ipynb │ ├── 8_sklearn_style.ipynb │ ├── 9_stacking.ipynb │ ├── README.md │ ├── hourse_price_preprocessor.py │ ├── house_price │ │ ├── test.csv │ │ └── train.csv │ ├── stacking.py │ ├── tatanic_X_train.npy │ ├── tatanic_test.npy │ ├── tatanic_y_train.npy │ ├── titanic │ │ ├── test.csv │ │ └── train.csv │ ├── titanic_data_preprocessor.py │ └── xgboost_installation_guide.md ├── ch14 │ ├── 1_log_transformation.ipynb │ ├── 2_PCA_example.ipynb │ ├── 3_univariate_select.ipynb │ ├── 4_model_based_feature_select.ipynb │ ├── 5_RFE.ipynb │ ├── 6_imbalanced_dataset.ipynb │ ├── 7_distributed_training.ipynb │ ├── 8_bigcon_analysis.ipynb │ ├── 9_automl_example.py │ ├── hourse_price_preprocessor.py │ ├── house_price │ │ ├── test.csv │ │ └── train.csv │ └── result.csv ├── ch2 │ ├── 1_load_boston_house_price.ipynb │ ├── 2_numpy_example.ipynb │ ├── housing.data │ └── housing.names ├── ch3 │ ├── 1_numpy_ndarray.ipynb │ ├── 20160901_20160930_public_list.csv │ ├── 2_numpy_reshape.ipynb │ ├── 3_indexing_slicing.ipynb │ ├── 4_numpy_creation_functions.ipynb │ ├── 5_ndarray_operation_functions.ipynb │ ├── 6_ndarray_operations.ipynb │ ├── 7_numpy_comparison.ipynb │ ├── 8_boolean_fancy_index.ipynb │ ├── 9_numpy_data_io.ipynb │ ├── int_data.csv │ ├── npy_test.npy │ └── populations.txt ├── ch4 │ ├── 1_data_loading.ipynb │ ├── 2_model_representation.ipynb │ ├── 3_pandas_series.ipynb │ ├── 4_pandas_dataframe.ipynb │ ├── 5_data_selection.ipynb │ ├── 6_dataframe_basic_operation.ipynb │ ├── 7_map_apply_lambda.ipynb │ ├── 8_built_in_functions.ipynb │ ├── data │ │ ├── excel-comp-data.xlsx │ │ └── wages.csv │ └── wages.csv ├── ch5 │ ├── 1_groupby_hierarchical_index.ipynb │ ├── 2_pivot_crosstab.ipynb │ ├── 3_merge_concat.ipynb │ ├── 4_db_persistence.ipynb │ └── data │ │ ├── AirPassengers.csv │ │ ├── customer-status.xlsx │ │ ├── excel-comp-data.xlsx │ │ ├── flights.db │ │ ├── movie_rating.csv │ │ ├── phone_data.csv │ │ ├── sales-feb-2014.xlsx │ │ ├── sales-jan-2014.xlsx │ │ └── sales-mar-2014.xlsx ├── ch6 │ ├── 10_bike_prorblem.ipynb │ ├── 1_basic_plot.ipynb │ ├── 1_watcha_plotting.ipynb │ ├── 2_cost_function.ipynb │ ├── 2_data_plot.ipynb │ ├── 3_gradient_descent.ipynb │ ├── 3_missing_value.ipynb │ ├── 4_categorical_data.ipynb │ ├── 4_linear_regression_implementation.ipynb │ ├── 5_1_sgd.ipynb │ ├── 5_feature_scaling.ipynb │ ├── 5_multiple_regression_w_gd_example.ipynb │ ├── 6_data_normalization_viz.ipynb │ ├── 6_multiple_linear_regression_with_sklearn.ipynb │ ├── 7_house_price_easy.ipynb │ ├── 8_house_price_hard.ipynb │ ├── 9_pipeline_example.ipynb │ ├── test.csv │ ├── test.png │ ├── titanic │ │ ├── gender_submission.csv │ │ ├── submission_result.csv │ │ ├── test.csv │ │ ├── titanic_solution.ipynb │ │ └── train.csv │ └── train.csv ├── ch7 │ ├── 1_gradient_descent.ipynb │ ├── 2_watcha_plotting.ipynb │ ├── 3_Linear_Regression_with_gradient_descent.ipynb │ ├── 4_multiple_regression_w_gd_example.ipynb │ ├── 5_multiple_linear_regression_with_sklearn.ipynb │ └── data │ │ └── slr06.csv ├── ch8 │ ├── 1_optimization_examples.ipynb │ ├── 2_sklearn_lr.ipynb │ ├── 3_polynomial_regression .ipynb │ ├── 4_cross_validation.ipynb │ ├── 5_bike.ipynb │ ├── data │ │ ├── sampleSubmission.csv │ │ ├── test.csv │ │ └── train.csv │ ├── submission_data.csv │ ├── submission_lasso_data.csv │ └── yield.csv ├── ch9 │ ├── 1_classification_problem_overview.ipynb │ ├── 2_sigmoid_function_overview.ipynb │ ├── 3_logistic_regression_with_numpy.ipynb │ ├── 4_Logistic_Regression_with_sklearn.ipynb │ ├── 5_performacne_metrics_for_classification.ipynb │ ├── 6_roc_curve.ipynb │ ├── data │ │ ├── generator.csv │ │ └── generators.csv │ └── uva.txt ├── ch99 │ └── teamlab_classifier.py ├── kaggle │ ├── Untitled.ipynb │ ├── test.csv │ └── train.csv └── test.md ├── documents ├── How_to_use_spark-sklearn_using_Google_Dataproc(kor).ipynb ├── test.html └── test.md └── lab_asssigment ├── 1_lab_numpy ├── README.md ├── linux_mac │ ├── install.sh │ ├── numpy_lab.py │ ├── submit.sh │ └── test.py ├── numpy_lab.pdf ├── numpy_lab.py └── windows │ ├── install.bat │ ├── numpy_lab.py │ ├── submit.bat │ └── test.py ├── 2_lab_build_matrix ├── 1000i.csv ├── README.md ├── build_matrix.pdf ├── build_matrix.py ├── images │ └── 2018 │ │ └── 01 │ │ └── matrix.png ├── linux_mac │ ├── build_matrix.py │ ├── install.sh │ ├── submit.sh │ └── test.py ├── movie_rating.csv └── windows │ ├── build_matrix.py │ ├── install.bat │ ├── submit.bat │ └── test.py ├── 5_normal_equation ├── README.md ├── lab_linear_model.pdf ├── linear_model.py ├── linear_regression_example.ipynb ├── linux_mac │ ├── install.sh │ ├── linear_model.py │ ├── linear_regression_example.ipynb │ ├── mlr09.csv │ ├── submit.sh │ ├── test.csv │ ├── test.py │ └── train.csv └── windows │ ├── install.bat │ ├── linear_model.py │ ├── linear_regression_example.ipynb │ ├── mlr09.csv │ ├── submit.bat │ ├── test.csv │ ├── test.py │ └── train.csv └── 6_gradient_descent ├── README.md ├── linear_model.py ├── linear_regression_example.ipynb ├── linux_mac ├── install.sh ├── linear_model.py ├── linear_regression_example.ipynb ├── mlr09.csv ├── submit.sh ├── test.csv ├── test.py └── train.csv ├── mlr09.csv ├── test.csv ├── train.csv └── windows ├── install.bat ├── linear_model.py ├── linear_regression_example.ipynb ├── mlr09.csv ├── submit.bat ├── test.csv ├── test.py └── train.csv /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | .venv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | ### JetBrains template 98 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 99 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 100 | 101 | # User-specific stuff: 102 | .idea/**/workspace.xml 103 | .idea/**/tasks.xml 104 | .idea/dictionaries 105 | .idea 106 | 107 | # Sensitive or high-churn files: 108 | .idea/**/dataSources/ 109 | .idea/**/dataSources.ids 110 | .idea/**/dataSources.xml 111 | .idea/**/dataSources.local.xml 112 | .idea/**/sqlDataSources.xml 113 | .idea/**/dynamic.xml 114 | .idea/**/uiDesigner.xml 115 | 116 | # Gradle: 117 | .idea/**/gradle.xml 118 | .idea/**/libraries 119 | 120 | # Mongo Explorer plugin: 121 | .idea/**/mongoSettings.xml 122 | 123 | ## File-based project format: 124 | *.iws 125 | 126 | ## Plugin-specific files: 127 | 128 | # IntelliJ 129 | /out/ 130 | 131 | # mpeltonen/sbt-idea plugin 132 | .idea_modules/ 133 | 134 | # JIRA plugin 135 | atlassian-ide-plugin.xml 136 | 137 | # Crashlytics plugin (for Android Studio and IntelliJ) 138 | com_crashlytics_export_strings.xml 139 | crashlytics.properties 140 | crashlytics-build.properties 141 | fabric.properties 142 | *.zip 143 | -------------------------------------------------------------------------------- /code/ch10/5_pipeline_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "columns = [\"id\", \"diagnosis\"] + [str(\"r\"+str(i)) for i in range(30)]\n", 24 | "df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data\", names=columns)\n", 25 | "X = df.loc[:,columns[2:]].values\n", 26 | "Y = df.loc[:,columns[1]].values\n", 27 | "\n", 28 | "from sklearn.model_selection import train_test_split\n", 29 | "x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=22)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "((455, 30), (455,), (114, 30), (114,))" 43 | ] 44 | }, 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "x_train.shape, y_train.shape, x_test.shape, y_test.shape" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 16, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "0.97368421052631582" 65 | ] 66 | }, 67 | "execution_count": 16, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "from sklearn.preprocessing import StandardScaler\n", 74 | "from sklearn.linear_model import LogisticRegression\n", 75 | "from sklearn.pipeline import Pipeline\n", 76 | "\n", 77 | "pipe_lr = Pipeline(steps=[('scl', StandardScaler()), ('clf', LogisticRegression())])\n", 78 | "\n", 79 | "pipe_lr.fit(X=x_train, y=y_train)\n", 80 | "pipe_lr.score(x_test, y_test)" 81 | ] 82 | } 83 | ], 84 | "metadata": { 85 | "anaconda-cloud": {}, 86 | "kernelspec": { 87 | "display_name": "Python [default]", 88 | "language": "python", 89 | "name": "python3" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.5.2" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /code/ch10/logicmodel.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/logicmodel.pkl -------------------------------------------------------------------------------- /code/ch10/min_max.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/min_max.npy -------------------------------------------------------------------------------- /code/ch10/my.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/my.png -------------------------------------------------------------------------------- /code/ch10/theta_bin.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/theta_bin.npy -------------------------------------------------------------------------------- /code/ch100/-log_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch100/-log_2.png -------------------------------------------------------------------------------- /code/ch100/log_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch100/log_2.png -------------------------------------------------------------------------------- /code/ch100/rental_price.csv: -------------------------------------------------------------------------------- 1 | ID SEASON WORK_DAY RENTALS 2 | 1 winter FALSE 800 3 | 2 winter FALSE 826 4 | 3 winter TRUE 900 5 | 4 spring FALSE 2100 6 | 5 spring TRUE 4740 7 | 6 spring TRUE 4900 8 | 8 summer TRUE 3000 9 | 9 summer TRUE 5800 10 | 10 autumn FALSE 6200 11 | 11 autumn FALSE 2910 12 | 12 autumn TRUE 2880 13 | 7 summer FALSE 2820 -------------------------------------------------------------------------------- /code/ch100/titanic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch100/titanic.png -------------------------------------------------------------------------------- /code/ch100/vegeterianl_dataset.csv: -------------------------------------------------------------------------------- 1 | ID STREAM SLOPE ELEVATION VEGETATION 2 | 1 false steep 3900 chapparal 3 | 2 true moderate 300 riparian 4 | 3 true steep 1500 riparian 5 | 4 false steep 1200 chapparal 6 | 5 false flat 4450 conifer 7 | 6 true steep 5000 conifer 8 | 7 true steep 3000 chapparal -------------------------------------------------------------------------------- /code/ch11/1_simple_naive_bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from pandas import Series, DataFrame\n", 12 | "import pandas as pd\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 7, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | "
viagraspam
011
100
200
300
400
\n", 74 | "
" 75 | ], 76 | "text/plain": [ 77 | " viagra spam\n", 78 | "0 1 1\n", 79 | "1 0 0\n", 80 | "2 0 0\n", 81 | "3 0 0\n", 82 | "4 0 0" 83 | ] 84 | }, 85 | "execution_count": 7, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "# Example from - https://chrisalbon.com/python/pandas_map_values_to_values.html\n", 92 | "viagra_spam = {'viagra': [1,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1],\n", 93 | " 'spam': [\n", 94 | " 1,0,0,0,0,0,1,0,1,0, 0,0,0,0,0,0,0,1,1,1\n", 95 | " ]}\n", 96 | "df = pd.DataFrame(viagra_spam, columns = ['viagra', 'spam'])\n", 97 | "df.head()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "array([[1, 1],\n", 109 | " [0, 0],\n", 110 | " [0, 0],\n", 111 | " [0, 0],\n", 112 | " [0, 0],\n", 113 | " [0, 0],\n", 114 | " [0, 1],\n", 115 | " [0, 0],\n", 116 | " [1, 1],\n", 117 | " [1, 0],\n", 118 | " [1, 0],\n", 119 | " [0, 0],\n", 120 | " [0, 0],\n", 121 | " [1, 0],\n", 122 | " [0, 0],\n", 123 | " [0, 0],\n", 124 | " [0, 0],\n", 125 | " [0, 1],\n", 126 | " [0, 1],\n", 127 | " [1, 1]])" 128 | ] 129 | }, 130 | "execution_count": 8, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "np_data = df.as_matrix()\n", 137 | "np_data" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 35, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "# P(Viagra)\n", 149 | "p_viagra = sum(np_data[:, 0] == 1) / len(np_data)\n", 150 | "p_spam = sum(np_data[:, 1] == 1) / len(np_data)\n", 151 | "p_v_cap_s = sum((np_data[:, 0] == 1) & (np_data[:, 1] == 1)) / len(np_data)\n", 152 | "p_n_v_cap_s = sum((np_data[:, 0] == 0) & (np_data[:, 1] == 1)) / len(np_data)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 33, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "0.5" 164 | ] 165 | }, 166 | "execution_count": 33, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "# P(spam | viagra)\n", 173 | "p_spam * (p_v_cap_s / p_spam ) / p_viagra" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 37, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "0.2142857142857143" 185 | ] 186 | }, 187 | "execution_count": 37, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "# P(spam | ~viagra)\n", 194 | "p_spam * (p_n_v_cap_s / p_spam ) / (1-p_viagra)" 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.6.1" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /code/ch11/data/20021010_easy_ham.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch11/data/20021010_easy_ham.tar.bz2 -------------------------------------------------------------------------------- /code/ch11/data/20021010_hard_ham.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch11/data/20021010_hard_ham.tar.bz2 -------------------------------------------------------------------------------- /code/ch11/data/20021010_spam.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch11/data/20021010_spam.tar.bz2 -------------------------------------------------------------------------------- /code/ch11/fraud.csv: -------------------------------------------------------------------------------- 1 | ID,History,CoApplicant,Accommodation,Fraud 2 | 1,current,none,own,true 3 | 2,paid,none,own,false 4 | 3,paid,none,own,false 5 | 4,paid,guarantor,rent,true 6 | 5,arrears,none,own,false 7 | 6,arrears,none,own,true 8 | 7,current,none,own,false 9 | 8,arrears,none,own,false 10 | 9,current,none,rent,false 11 | 10,none,none,own,true 12 | 11,current,coapplicant,own,false 13 | 12,current,none,own,true 14 | 13,current,none,rent,true 15 | 14,paid,none,own,false 16 | 15,arrears,none,own,false 17 | 16,current,none,own,false 18 | 17,arrears,coapplicant,rent,false 19 | 18,arrears,none,free,false 20 | 19,arrears,none,own,false 21 | 20,paid,none,own,false 22 | -------------------------------------------------------------------------------- /code/ch12/6_regress_tree_with_sklearn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 29, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn.datasets import load_boston\n", 12 | "from sklearn.cross_validation import cross_val_score\n", 13 | "from sklearn.cross_validation import KFold\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 80, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "boston = load_boston()\n", 26 | "X, y = boston.data, boston.target\n", 27 | "features = boston.feature_names\n", 28 | "\n", 29 | "crossvalidation = KFold(n=X.shape[0], n_folds=10,\n", 30 | " shuffle=True, random_state=1)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 81, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Mean squared error: 18.540\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "from sklearn.tree import DecisionTreeRegressor\n", 50 | "regression_tree = tree.DecisionTreeRegressor(\n", 51 | " min_samples_split=3, min_samples_leaf=3, random_state=0, max_leaf_nodes=20)\n", 52 | "regression_tree.fit(X,y)\n", 53 | "score = np.mean(\n", 54 | " cross_val_score(regression_tree, X, y, scoring='neg_mean_squared_error', cv=crossvalidation, n_jobs=1))\n", 55 | "print ('Mean squared error: %.3f' % abs(score))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 82, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "Mean squared error: 23.764\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "from sklearn import linear_model\n", 75 | "regr = linear_model.LinearRegression(normalize=True)\n", 76 | "score = np.mean(\n", 77 | " cross_val_score(regr, X, y, scoring='neg_mean_squared_error', cv=crossvalidation, n_jobs=1))\n", 78 | "print ('Mean squared error: %.3f' % abs(score))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 89, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "from sklearn.datasets import load_iris\n", 90 | "iris = load_iris()\n", 91 | "X, y = iris.data, iris.target\n", 92 | "features = iris.feature_names\n", 93 | "\n", 94 | "crossvalidation = KFold(n=X.shape[0], n_folds=5,\n", 95 | " shuffle=True, random_state=1)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 100, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "Depth: 1 Accuracy: 0.580\n", 110 | "Depth: 2 Accuracy: 0.913\n", 111 | "Depth: 3 Accuracy: 0.920\n", 112 | "Depth: 4 Accuracy: 0.940\n", 113 | "Depth: 5 Accuracy: 0.920\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "from sklearn import tree\n", 119 | "for depth in range(1,10):\n", 120 | " tree_classifier = tree.DecisionTreeClassifier(\n", 121 | " max_depth=depth, random_state=0)\n", 122 | " if tree_classifier.fit(X,y).tree_.max_depth < depth:\n", 123 | " break\n", 124 | " score = np.mean(cross_val_score(tree_classifier, X, y,\n", 125 | " scoring='accuracy', cv=crossvalidation, n_jobs=1))\n", 126 | " print ('Depth: %i Accuracy: %.3f' % (depth,score))" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python [conda env:ml_scratch]", 133 | "language": "python", 134 | "name": "conda-env-ml_scratch-py" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.6.2" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | -------------------------------------------------------------------------------- /code/ch12/rental_price.csv: -------------------------------------------------------------------------------- 1 | ID SEASON WORK_DAY RENTALS 2 | 1 winter FALSE 800 3 | 2 winter FALSE 826 4 | 3 winter TRUE 900 5 | 4 spring FALSE 2100 6 | 5 spring TRUE 4740 7 | 6 spring TRUE 4900 8 | 8 summer TRUE 3000 9 | 9 summer TRUE 5800 10 | 10 autumn FALSE 6200 11 | 11 autumn FALSE 2910 12 | 12 autumn TRUE 2880 13 | 7 summer FALSE 2820 -------------------------------------------------------------------------------- /code/ch12/titanic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch12/titanic.png -------------------------------------------------------------------------------- /code/ch12/vegeterianl_dataset.csv: -------------------------------------------------------------------------------- 1 | ID STREAM SLOPE ELEVATION VEGETATION 2 | 1 false steep 3900 chapparal 3 | 2 true moderate 300 riparian 4 | 3 true steep 1500 riparian 5 | 4 false steep 1200 chapparal 6 | 5 false flat 4450 conifer 7 | 6 true steep 5000 conifer 8 | 7 true steep 3000 chapparal -------------------------------------------------------------------------------- /code/ch13/1_ensemble.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "from sklearn.tree import DecisionTreeClassifier\n", 11 | "from sklearn.linear_model import LogisticRegression\n", 12 | "from sklearn.naive_bayes import GaussianNB\n", 13 | "from sklearn.ensemble import VotingClassifier" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 9, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "X = np.load(\"./tatanic_X_train.npy\")\n", 23 | "y = np.load(\"./tatanic_y_train.npy\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 13, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "array([0.27345609, 0.01415106, 0. , 1. , 0. ,\n", 35 | " 0.125 , 0. , 0. , 0. , 1. ,\n", 36 | " 0. , 0. , 0. , 0. , 0. ,\n", 37 | " 1. , 0. , 0. , 1. , 0. ,\n", 38 | " 0. , 0. , 0. , 0. , 0. ,\n", 39 | " 0. , 0. ])" 40 | ] 41 | }, 42 | "execution_count": 13, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "X[0]" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 16, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "array([0., 1., 1., 1., 0., 0., 0., 0., 1., 1.])" 60 | ] 61 | }, 62 | "execution_count": 16, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "y[:10]" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 47, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "clf1 = LogisticRegression(random_state=1)\n", 78 | "clf2 = DecisionTreeClassifier(random_state=1)\n", 79 | "clf3 = GaussianNB()\n", 80 | "eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 48, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "0.8020504030978227" 92 | ] 93 | }, 94 | "execution_count": 48, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "from sklearn.model_selection import cross_val_score\n", 101 | "cross_val_score(eclf, X, y, cv=5).mean()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 49, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "0.8290420872214816" 113 | ] 114 | }, 115 | "execution_count": 49, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "cross_val_score(clf1, X, y, cv=5).mean()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 50, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "0.7840411350219006" 133 | ] 134 | }, 135 | "execution_count": 50, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "cross_val_score(clf2, X, y, cv=5).mean()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 51, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "0.4600139655938551" 153 | ] 154 | }, 155 | "execution_count": 51, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "cross_val_score(clf3, X, y, cv=5).mean()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 62, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "clf1 = LogisticRegression(random_state=1)\n", 171 | "clf2 = DecisionTreeClassifier(random_state=1)\n", 172 | "eclf = VotingClassifier(estimators=[('lr', clf1), ('dt', clf2)], voting='hard')" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 63, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "0.8222687742017394" 184 | ] 185 | }, 186 | "execution_count": 63, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "cross_val_score(eclf, X, y, cv=5).mean()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 81, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "c_params = [0.1, 5.0, 7.0, 10.0, 15.0, 20.0, 100.0]\n", 202 | "\n", 203 | "\n", 204 | "params ={\n", 205 | " \"lr__solver\" : ['liblinear'], \"lr__penalty\" : [\"l2\"], \"lr__C\" : c_params,\"dt__criterion\" : [\"gini\", \"entropy\"],\n", 206 | " \"dt__max_depth\" : [10,8,7,6,5,4,3,2],\n", 207 | " \"dt__min_samples_leaf\": [1,2,3,4,5,6,7,8,9]\n", 208 | " }" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "from sklearn.model_selection import GridSearchCV\n", 218 | "grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)\n", 219 | "grid = grid.fit(X, y)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "grid.best_score_" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "grid.best_params_" 238 | ] 239 | } 240 | ], 241 | "metadata": { 242 | "kernelspec": { 243 | "display_name": "Python 3", 244 | "language": "python", 245 | "name": "python3" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 3 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython3", 257 | "version": "3.6.5" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 2 262 | } 263 | -------------------------------------------------------------------------------- /code/ch13/4_adaboost.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 85, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 87, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "['three' 'one' 'one' 'two' 'three' 'two' 'three' 'three' 'one' 'one']\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "elements = ['one', 'two', 'three'] \n", 27 | "weights = [0.2, 0.3, 0.5]\n", 28 | "\n", 29 | "from numpy.random import choice\n", 30 | "print(choice(elements, size=10, replace=True, p=weights))" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 88, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "X = np.load(\"./tatanic_X_train.npy\")\n", 40 | "y = np.load(\"./tatanic_y_train.npy\")" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 89, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "array([0.27345609, 0.01415106, 0. , 1. , 0. ,\n", 52 | " 0.125 , 0. , 0. , 0. , 1. ,\n", 53 | " 0. , 0. , 0. , 0. , 0. ,\n", 54 | " 1. , 0. , 0. , 1. , 0. ,\n", 55 | " 0. , 0. , 0. , 0. , 0. ,\n", 56 | " 0. , 0. ])" 57 | ] 58 | }, 59 | "execution_count": 89, 60 | "metadata": {}, 61 | "output_type": "execute_result" 62 | } 63 | ], 64 | "source": [ 65 | "X[0]" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 90, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "array([0., 1., 1., 1., 0., 0., 0., 0., 1., 1.])" 77 | ] 78 | }, 79 | "execution_count": 90, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "y[:10]" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 91, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "from sklearn.ensemble import AdaBoostClassifier\n", 95 | "from sklearn.tree import DecisionTreeClassifier\n" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 98, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "eclf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=2), n_estimators=500, \n", 105 | " learning_rate=0.1)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 99, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "0.8155272011680316" 117 | ] 118 | }, 119 | "execution_count": 99, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "from sklearn.model_selection import cross_val_score\n", 126 | "cross_val_score(eclf, X, y, cv=5).mean()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 100, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", 138 | " max_features=None, max_leaf_nodes=None,\n", 139 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 140 | " min_samples_leaf=1, min_samples_split=2,\n", 141 | " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", 142 | " splitter='best')" 143 | ] 144 | }, 145 | "execution_count": 100, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "from sklearn.tree import DecisionTreeClassifier\n", 152 | "DecisionTreeClassifier()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "AdaBoostClassifier()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 131, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "params = {\"base_estimator__criterion\" : [\"gini\", \"entropy\"],\n", 171 | " \"base_estimator__max_features\" : [7,8,],\n", 172 | " \"base_estimator__max_depth\" : [1,2],\n", 173 | " \"n_estimators\": [23,24, 25, 26, 27],\n", 174 | " \"learning_rate\": [0.4, 0.45, 0.5, 0.55, 0.6]\n", 175 | " }" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "from sklearn.model_selection import GridSearchCV\n", 185 | "grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5, n_jobs=7)\n", 186 | "grid = grid.fit(X, y)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 128, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "0.8312710911136107" 198 | ] 199 | }, 200 | "execution_count": 128, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "grid.best_score_" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 129, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "{'base_estimator__criterion': 'gini',\n", 218 | " 'base_estimator__max_depth': 2,\n", 219 | " 'base_estimator__max_features': 8,\n", 220 | " 'learning_rate': 0.6,\n", 221 | " 'n_estimators': 26}" 222 | ] 223 | }, 224 | "execution_count": 129, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "grid.best_params_" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 130, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "array([0.19406886, 0.23061462, 0.04090978, 0.05602668, 0.06539204,\n", 242 | " 0.06219571, 0.04414943, 0.03733474, 0.02491265, 0.00782351,\n", 243 | " 0. , 0. , 0.01205415, 0.05311241, 0.01024874,\n", 244 | " 0.04730915, 0. , 0.01836485, 0.00899386, 0. ,\n", 245 | " 0.00761332, 0.03372184, 0.02510816, 0.02004552, 0. ,\n", 246 | " 0. , 0. ])" 247 | ] 248 | }, 249 | "execution_count": 130, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "grid.best_estimator_.feature_importances_" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 3", 269 | "language": "python", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 3 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython3", 282 | "version": "3.6.5" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 2 287 | } 288 | -------------------------------------------------------------------------------- /code/ch13/6_xgboost.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import xgboost as xgb" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "X = np.load(\"./tatanic_X_train.npy\")\n", 24 | "y = np.load(\"./tatanic_y_train.npy\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 5, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "from sklearn.cross_validation import train_test_split" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 11, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.3)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 12, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "dtrain = xgb.DMatrix(X_train, label=y_train)\n", 52 | "dtest = xgb.DMatrix(X_test, label=y_test)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 13, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "param = {'max_depth': 2, 'eta': 0.5, 'silent': 1, 'objective': 'binary:logistic'}\n", 64 | "param['nthread'] = 7\n", 65 | "param['eval_metric'] = 'auc'\n", 66 | "evallist = [(dtest, 'eval'), (dtrain, 'train')]\n", 67 | "plst = param.items()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 14, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "[0]\teval-auc:0.844072\ttrain-auc:0.828469\n", 80 | "[1]\teval-auc:0.877096\ttrain-auc:0.85691\n", 81 | "[2]\teval-auc:0.88491\ttrain-auc:0.854543\n", 82 | "[3]\teval-auc:0.893383\ttrain-auc:0.86737\n", 83 | "[4]\teval-auc:0.891856\ttrain-auc:0.873424\n", 84 | "[5]\teval-auc:0.89488\ttrain-auc:0.879952\n", 85 | "[6]\teval-auc:0.898204\ttrain-auc:0.886382\n", 86 | "[7]\teval-auc:0.897515\ttrain-auc:0.88684\n", 87 | "[8]\teval-auc:0.893114\ttrain-auc:0.890581\n", 88 | "[9]\teval-auc:0.894431\ttrain-auc:0.895124\n", 89 | "[10]\teval-auc:0.897036\ttrain-auc:0.89621\n", 90 | "[11]\teval-auc:0.897036\ttrain-auc:0.898418\n", 91 | "[12]\teval-auc:0.898772\ttrain-auc:0.897682\n", 92 | "[13]\teval-auc:0.898952\ttrain-auc:0.898108\n", 93 | "[14]\teval-auc:0.900509\ttrain-auc:0.89922\n", 94 | "[15]\teval-auc:0.891617\ttrain-auc:0.904314\n", 95 | "[16]\teval-auc:0.892036\ttrain-auc:0.904407\n", 96 | "[17]\teval-auc:0.886228\ttrain-auc:0.906359\n", 97 | "[18]\teval-auc:0.888024\ttrain-auc:0.91106\n", 98 | "[19]\teval-auc:0.886916\ttrain-auc:0.912811\n", 99 | "[20]\teval-auc:0.888114\ttrain-auc:0.912549\n", 100 | "[21]\teval-auc:0.887874\ttrain-auc:0.913454\n", 101 | "[22]\teval-auc:0.888473\ttrain-auc:0.913858\n", 102 | "[23]\teval-auc:0.888593\ttrain-auc:0.914807\n", 103 | "[24]\teval-auc:0.890958\ttrain-auc:0.91875\n", 104 | "[25]\teval-auc:0.892305\ttrain-auc:0.921106\n", 105 | "[26]\teval-auc:0.893323\ttrain-auc:0.921101\n", 106 | "[27]\teval-auc:0.891078\ttrain-auc:0.922229\n", 107 | "[28]\teval-auc:0.890539\ttrain-auc:0.922982\n", 108 | "[29]\teval-auc:0.89012\ttrain-auc:0.923898\n", 109 | "[30]\teval-auc:0.891078\ttrain-auc:0.923604\n", 110 | "[31]\teval-auc:0.891228\ttrain-auc:0.924171\n", 111 | "[32]\teval-auc:0.891377\ttrain-auc:0.924455\n", 112 | "[33]\teval-auc:0.89003\ttrain-auc:0.926942\n", 113 | "[34]\teval-auc:0.889072\ttrain-auc:0.927061\n", 114 | "[35]\teval-auc:0.89012\ttrain-auc:0.928136\n", 115 | "[36]\teval-auc:0.891856\ttrain-auc:0.928114\n", 116 | "[37]\teval-auc:0.888413\ttrain-auc:0.929985\n", 117 | "[38]\teval-auc:0.89003\ttrain-auc:0.929259\n", 118 | "[39]\teval-auc:0.890509\ttrain-auc:0.931899\n", 119 | "[40]\teval-auc:0.89021\ttrain-auc:0.93269\n", 120 | "[41]\teval-auc:0.888802\ttrain-auc:0.934086\n", 121 | "[42]\teval-auc:0.889641\ttrain-auc:0.933922\n", 122 | "[43]\teval-auc:0.889251\ttrain-auc:0.934953\n", 123 | "[44]\teval-auc:0.889731\ttrain-auc:0.935924\n", 124 | "[45]\teval-auc:0.88979\ttrain-auc:0.936611\n", 125 | "[46]\teval-auc:0.890269\ttrain-auc:0.937489\n", 126 | "[47]\teval-auc:0.890569\ttrain-auc:0.937784\n", 127 | "[48]\teval-auc:0.891467\ttrain-auc:0.93858\n", 128 | "[49]\teval-auc:0.888413\ttrain-auc:0.939332\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "num_round = 50\n", 134 | "bst = xgb.train(plst, dtrain, num_round, evallist)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": true 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "ypred = bst.predict(dtest, ntree_limit=bst.best_ntree_limit)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 17, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "0.83895131086142327" 157 | ] 158 | }, 159 | "execution_count": 17, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "(sum((ypred>0.5) == y_test)) / 267.0" 166 | ] 167 | } 168 | ], 169 | "metadata": { 170 | "kernelspec": { 171 | "display_name": "Python 2", 172 | "language": "python", 173 | "name": "python2" 174 | }, 175 | "language_info": { 176 | "codemirror_mode": { 177 | "name": "ipython", 178 | "version": 2 179 | }, 180 | "file_extension": ".py", 181 | "mimetype": "text/x-python", 182 | "name": "python", 183 | "nbconvert_exporter": "python", 184 | "pygments_lexer": "ipython2", 185 | "version": "2.7.13" 186 | } 187 | }, 188 | "nbformat": 4, 189 | "nbformat_minor": 2 190 | } 191 | -------------------------------------------------------------------------------- /code/ch13/6_xgboost.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | -------------------------------------------------------------------------------- /code/ch13/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/README.md -------------------------------------------------------------------------------- /code/ch13/hourse_price_preprocessor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn import preprocessing 4 | 5 | def get_train_test_split_dataset(train_dataset_filename=None, 6 | test_dataset_filename = None): 7 | df_train = pd.read_csv(train_dataset_filename) 8 | df_test = pd.read_csv(test_dataset_filename) 9 | 10 | 11 | # Train_Test_concat & Trarget_value Extract 12 | train_index = df_train['Id'].values-1 13 | test_index = df_test['Id'].values-1 14 | target_value = df_train.iloc[:,-1] 15 | 16 | df_concat = pd.concat([df_train.iloc[:,:-1],df_test],axis=0,ignore_index=True) 17 | df_concat_numeric = df_concat.loc[:,df_concat.dtypes!='object'] 18 | ############################################################### 19 | # 칼럼 Searching 20 | df_concat_numeric.drop(['MoSold'],axis=1,inplace=True) 21 | 22 | ############################################################### 23 | # Missing_value_Solve 24 | null_colums = df_concat_numeric.isnull().sum().sort_values(ascending=False)[df_concat_numeric.isnull().sum().sort_values(ascending=False)>0].index.tolist() 25 | ### GarageYrBlt 특이치 해결 26 | df_concat_numeric.GarageYrBlt.loc[2592] = df_concat_numeric.GarageYrBlt.loc[2592]-200 27 | 28 | ### Null_solve_function################### 29 | def null_solve(data_frame, null_list): 30 | for column in null_list: 31 | data_frame[column].fillna(data_frame[column].mean(),inplace=True) 32 | ########################################## 33 | null_solve(df_concat_numeric,null_colums) 34 | 35 | ### 리모델링 여부 반영 36 | df_concat_numeric['Remodleling'] = df_concat_numeric['YearBuilt']!=df_concat_numeric['YearRemodAdd'] 37 | 38 | df_quality_type = df_concat_numeric[['MSSubClass','OverallQual','OverallCond']] 39 | df_quantity_type = df_concat_numeric.drop(['MSSubClass','OverallQual','OverallCond'],axis=1) 40 | 41 | ############################################################### 42 | #Scaling_value 43 | ###Min_Max Scaling 44 | #from sklearn import preprocessing 45 | minmax_scale = preprocessing.MinMaxScaler().fit(df_quantity_type.iloc[train_index,1:].values) 46 | x_quantitiy_scaled = minmax_scale.transform(df_quantity_type.iloc[train_index,1:].values) 47 | 48 | ###One_hot Scaling 49 | one_hot = preprocessing.OneHotEncoder() 50 | one_hot.fit(df_quality_type.iloc[train_index].values) 51 | x_quality_scaled = one_hot.transform(df_quality_type.iloc[train_index].values).toarray() 52 | 53 | #Train 54 | x_scaled_data = np.hstack((x_quality_scaled,x_quantitiy_scaled)) 55 | Y_scaled_data = target_value.reshape(-1,) 56 | 57 | 58 | 59 | ############################################################### 60 | #Predict 61 | x_quan_predict_scaled = minmax_scale.transform(df_quantity_type.iloc[test_index,1:].values) 62 | x_qual_predict_scaled = one_hot.transform(df_quality_type.iloc[test_index].values).toarray() 63 | X_scaled_predict = np.hstack((x_qual_predict_scaled,x_quan_predict_scaled)) 64 | 65 | 66 | X_train = x_scaled_data 67 | y_train = Y_scaled_data 68 | X_test = X_scaled_predict 69 | test_id_idx = test_index + 1 70 | 71 | return X_train, X_test, y_train, test_id_idx 72 | -------------------------------------------------------------------------------- /code/ch13/stacking.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import RegressorMixin, ClassifierMixin 2 | from sklearn.base import BaseEstimator 3 | 4 | from sklearn.model_selection import train_test_split 5 | 6 | import numpy as np 7 | 8 | 9 | class MyStackingRegressor(BaseEstimator, RegressorMixin): 10 | def __init__(self, meta_estimator, base_estimators, test_ratio=0.2, feature_weights=None): 11 | """ 12 | Called when initializing the classifier 13 | """ 14 | self.meta_estimator = meta_estimator 15 | self.base_estimators = base_estimators 16 | self.feature_weights = feature_weights 17 | self.test_ratio = test_ratio 18 | 19 | def fit(self, X, y=None): 20 | 21 | X_train, X_test, y_train, y_test = train_test_split( 22 | X, y, test_size=self.test_ratio) 23 | 24 | for estimator in self.base_estimators: 25 | estimator.fit(X_train, y_train) 26 | 27 | meta_train_set = np.array([estimator.predict(X_test) 28 | for estimator in self.base_estimators]).T 29 | 30 | 31 | self.meta_estimator.fit(meta_train_set, y_test) 32 | 33 | return self 34 | 35 | def predict(self, X, y=None): 36 | meta_X = [] 37 | for estimator in self.base_estimators: 38 | meta_X.append(estimator.predict(X)) 39 | meta_X = np.array(meta_X).T 40 | 41 | return self.meta_estimator.predict(meta_X) 42 | 43 | 44 | class MyStackingClassifier(BaseEstimator, ClassifierMixin): 45 | def __init__(self, meta_estimator, base_estimators, test_ratio=0.2, feature_weights=None): 46 | """ 47 | Called when initializing the classifier 48 | """ 49 | self.meta_estimator = meta_estimator 50 | self.base_estimators = base_estimators 51 | self.feature_weights = feature_weights 52 | self.test_ratio = test_ratio 53 | 54 | def fit(self, X, y=None): 55 | 56 | X_train, X_test, y_train, y_test = train_test_split( 57 | X, y, test_size=self.test_ratio) 58 | 59 | for estimator in self.base_estimators: 60 | estimator.fit(X_train, y_train) 61 | 62 | meta_train_set = np.array([estimator.predict(X_test) 63 | for estimator in self.base_estimators]).T 64 | 65 | 66 | self.meta_estimator.fit(meta_train_set, y_test) 67 | return self 68 | 69 | def predict(self, X, y=None): 70 | meta_X = [] 71 | for estimator in self.base_estimators: 72 | meta_X.append(estimator.predict(X)) 73 | meta_X = np.array(meta_X).T 74 | return self.meta_estimator.predict(meta_X) 75 | -------------------------------------------------------------------------------- /code/ch13/tatanic_X_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/tatanic_X_train.npy -------------------------------------------------------------------------------- /code/ch13/tatanic_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/tatanic_test.npy -------------------------------------------------------------------------------- /code/ch13/tatanic_y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/tatanic_y_train.npy -------------------------------------------------------------------------------- /code/ch13/titanic_data_preprocessor.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.preprocessing import MinMaxScaler 4 | 5 | 6 | def transform_status(x): 7 | if "Mrs" in x or "Ms" in x: 8 | return "Mrs" 9 | elif "Mr" in x: 10 | return "Mr" 11 | elif "Miss" in x: 12 | return "Miss" 13 | elif "Master" in x: 14 | return "Master" 15 | elif "Dr" in x: 16 | return "Dr" 17 | elif "Rev" in x: 18 | return "Rev" 19 | elif "Col" in x: 20 | return "Col" 21 | else: 22 | return "0" 23 | 24 | train_df = pd.read_csv("titanic/train.csv") 25 | test_df = pd.read_csv("titanic/test.csv") 26 | 27 | train_id = train_df["PassengerId"].values 28 | test_id = test_df["PassengerId"].values 29 | 30 | all_df = train_df.append(test_df).set_index('PassengerId') 31 | all_df["Sex"] = all_df["Sex"].replace({"male":0,"female":1}) 32 | all_df["Age"].fillna( 33 | all_df.groupby("Pclass")["Age"].transform("mean"), inplace=True) 34 | all_df["cabin_count"] = all_df["Cabin"].map(lambda x : len(x.split()) if type(x) == str else 0) 35 | all_df["social_status"] = all_df["Name"].map(lambda x : transform_status(x)) 36 | all_df = all_df.drop([62,830]) 37 | train_id = np.delete(train_id, [62-1,830-1]) 38 | all_df.loc[all_df["Fare"].isnull(), "Fare"] = 12.415462 39 | all_df["cabin_type"] = all_df["Cabin"].map(lambda x : x[0] if type(x) == str else "99") 40 | 41 | del all_df["Cabin"] 42 | del all_df["Name"] 43 | del all_df["Ticket"] 44 | 45 | y = all_df.loc[train_id, "Survived"].values 46 | del all_df["Survived"] 47 | 48 | X_df = pd.get_dummies(all_df) 49 | X = X_df.values 50 | 51 | minmax_scaler = MinMaxScaler() 52 | minmax_scaler.fit(X) 53 | X = minmax_scaler.transform(X) 54 | 55 | X_train = X[:len(train_id)] 56 | X_test = X[len(train_id):] 57 | 58 | np.save("tatanic_X_train.npy", X_train) 59 | np.save("tatanic_y_train.npy", y) 60 | np.save("tatanic_test.npy", X_test) 61 | -------------------------------------------------------------------------------- /code/ch13/xgboost_installation_guide.md: -------------------------------------------------------------------------------- 1 | # Gradient Boosting Package Installation Guide for windows 2 | 3 | 본 문서는 대표적인 Gradient Boosting Package인 XGBoost와 LightGBM의 Windows 설치를 안내합니다. 두 패키지의 설치를 위해서는 conda를 이용한 설치와 컴파일후 pip로 설치하는 두 가지 방법이 있습니다. 4 | 5 | ## prerequiste 6 | 패키지 설치를 위해서는 아래와 같은 도구들의 준비가 필요합니다. 7 | 8 | - git(https://git-scm.com/) 9 | - cmake(https://cmake.org/download/) 10 | - .Net Core SDK(https://www.microsoft.com/net/download/windows) 11 | - .NET Framework Develop Pack 12 | (https://www.microsoft.com/net/download/windows) 13 | 14 | 15 | ## XGBoost installation guide 16 | ### conda 17 | conda 설치는 아래와 같이 간단한 명령어 설치 됩니다. 단 컴퓨터 사항에 따라 설치가 되지 않을 수 도 있습니다. 18 | ```bash 19 | activate ml #가상환경 호출 20 | conda install -c mndrake xgboost 21 | ``` 22 | ### Install from source code 23 | source code를 사용해서 설치를 할 경우 `cmd창에서` 아래와 같은 명령어를 입력합니다. 24 | 25 | #### git clone 26 | ```bash 27 | git clone --recursive https://github.com/dmlc/xgboost 28 | 29 | cd xgboost 30 | git submodule init 31 | git submodule update 32 | ``` 33 | 34 | #### build 35 | ```bash 36 | mkdir build 37 | cd build 38 | cmake .. -G"Visual Studio 15 2017 Win64" 39 | cmake --build . --target xgboost --config Release 40 | cd.. 41 | ``` 42 | 43 | #### python 설치 44 | 파이썬 설치전 반드시 가상환경 호출 필요 45 | ```bash 46 | activae ml #가상환경 호출 47 | cd python-package 48 | python setup.py install 49 | ``` 50 | 51 | 52 | ## lightgbm installation guide 53 | ### conda 54 | conda 설치는 아래와 같이 간단한 명령어 설치 됩니다. 단 컴퓨터 사항에 따라 설치가 되지 않을 수 도 있습니다. 55 | 56 | ```bash 57 | activate ml #가상환경 호출 58 | conda install -c conda-forge lightgbm 59 | ``` 60 | 61 | ### Install from source code 62 | source code를 사용해서 설치를 할 경우 `cmd창에서` 아래와 같은 명령어를 입력합니다. 63 | 64 | #### git clone 65 | ```bash 66 | git clone --recursive https://github.com/Microsoft/LightGBM 67 | ``` 68 | 69 | #### build 70 | ```bash 71 | cd LightGBM 72 | mkdir build 73 | cd build 74 | cmake -DCMAKE_GENERATOR_PLATFORM=x64 .. 75 | cmake --build . --target ALL_BUILD --config Release 76 | ``` 77 | 78 | #### python 설치 79 | 파이썬 설치전 반드시 가상환경 호출 필요 80 | ```bash 81 | cd .. 82 | activae ml #가상환경 호출 83 | cd python-package 84 | python setup.py install 85 | ``` 86 | -------------------------------------------------------------------------------- /code/ch14/7_distributed_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/Users/sungchulchoi/miniconda3/envs/ml/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n", 14 | "/Users/sungchulchoi/miniconda3/envs/ml/lib/python3.6/site-packages/sklearn/grid_search.py:42: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.\n", 15 | " DeprecationWarning)\n" 16 | ] 17 | }, 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "--- 183.58999824523926 seconds ---\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "from sklearn import grid_search, datasets\n", 28 | "from sklearn.ensemble import RandomForestClassifier\n", 29 | "from sklearn.grid_search import GridSearchCV\n", 30 | "digits = datasets.load_digits()\n", 31 | "X, y = digits.data, digits.target\n", 32 | "param_grid = {\"max_depth\": [3, None],\n", 33 | " \"max_features\": [1, 3, 10],\n", 34 | " \"min_samples_split\": [2, 3, 10],\n", 35 | " \"min_samples_leaf\": [1, 3, 10],\n", 36 | " \"bootstrap\": [True, False],\n", 37 | " \"criterion\": [\"gini\", \"entropy\"],\n", 38 | " \"n_estimators\": [10, 20, 40, 80]}\n", 39 | "import time\n", 40 | "start_time = time.time()\n", 41 | "gs = grid_search.GridSearchCV(RandomForestClassifier(), param_grid=param_grid)\n", 42 | "gs.fit(X, y)\n", 43 | "print(\"--- %s seconds ---\" % (time.time() - start_time))" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Scikit-learn on Spark" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "from sklearn import grid_search, datasets\n", 60 | "from sklearn.ensemble import RandomForestClassifier\n", 61 | "# Use spark_sklearn’s grid search instead:\n", 62 | "from spark_sklearn import GridSearchCV\n", 63 | "digits = datasets.load_digits()\n", 64 | "X, y = digits.data, digits.target\n", 65 | "param_grid = {\"max_depth\": [3, None],\n", 66 | " \"max_features\": [1, 3, 10],\n", 67 | " \"min_samples_split\": [2, 3, 10],\n", 68 | " \"min_samples_leaf\": [1, 3, 10],\n", 69 | " \"bootstrap\": [True, False],\n", 70 | " \"criterion\": [\"gini\", \"entropy\"],\n", 71 | " \"n_estimators\": [10, 20, 40, 80]}\n", 72 | "gs = grid_search.GridSearchCV(RandomForestClassifier(), param_grid=param_grid)\n", 73 | "gs.fit(X, y)" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.6.5" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 2 98 | } 99 | -------------------------------------------------------------------------------- /code/ch14/9_automl_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from sklearn.ensemble import RandomForestClassifier 5 | 6 | 7 | y_label_df = pd.read_csv("./data/train_label.csv") 8 | y = pd.Series([1,1,0,1], index=["2month", "month", "retained","week"]) 9 | y_label_df["class"] = y_label_df["label"].map(y) 10 | 11 | 12 | X_raw_df = pd.read_csv("./data/train_activity.csv") 13 | X_df = X_raw_df.groupby(["acc_id"]).sum().reset_index() 14 | 15 | from sklearn.model_selection import train_test_split 16 | 17 | X_payment_df = pd.read_csv("./data/train_payment.csv") 18 | X_df = X_df.merge(X_payment_df.groupby("acc_id").sum().reset_index(), how="left", on="acc_id") 19 | 20 | 21 | X_df = X_df.fillna(0) 22 | 23 | X_train, X_test, y_train, y_test = train_test_split( 24 | X_df.values, y_label_df["class"].values, test_size=0.2, stratify=y_label_df["class"]) 25 | 26 | rfc = RandomForestClassifier() 27 | rfc.fit(X_train[:, 1:], y_train) 28 | from sklearn.metrics import accuracy_score 29 | y_pred = rfc.predict(X_test[:,1:]) 30 | print(accuracy_score(y_test, y_pred)) 31 | 32 | import autosklearn.classification 33 | automl = autosklearn.classification.AutoSklearnClassifier( 34 | ) 35 | 36 | automl.fit(X_train[:, 1:], y_train) 37 | 38 | y_pred = automl.predict(X_test[:,1:]) 39 | print("Accuracy score", accuracy_score(y_test, y_pred)) 40 | -------------------------------------------------------------------------------- /code/ch14/hourse_price_preprocessor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn import preprocessing 4 | 5 | def get_train_test_split_dataset(train_dataset_filename=None, 6 | test_dataset_filename = None): 7 | df_train = pd.read_csv(train_dataset_filename) 8 | df_test = pd.read_csv(test_dataset_filename) 9 | 10 | 11 | # Train_Test_concat & Trarget_value Extract 12 | train_index = df_train['Id'].values-1 13 | test_index = df_test['Id'].values-1 14 | target_value = df_train.iloc[:,-1] 15 | 16 | df_concat = pd.concat([df_train.iloc[:,:-1],df_test],axis=0,ignore_index=True) 17 | df_concat_numeric = df_concat.loc[:,df_concat.dtypes!='object'] 18 | ############################################################### 19 | # 칼럼 Searching 20 | df_concat_numeric.drop(['MoSold'],axis=1,inplace=True) 21 | 22 | ############################################################### 23 | # Missing_value_Solve 24 | null_colums = df_concat_numeric.isnull().sum().sort_values(ascending=False)[df_concat_numeric.isnull().sum().sort_values(ascending=False)>0].index.tolist() 25 | ### GarageYrBlt 특이치 해결 26 | df_concat_numeric.GarageYrBlt.loc[2592] = df_concat_numeric.GarageYrBlt.loc[2592]-200 27 | 28 | ### Null_solve_function################### 29 | def null_solve(data_frame, null_list): 30 | for column in null_list: 31 | data_frame[column].fillna(data_frame[column].mean(),inplace=True) 32 | ########################################## 33 | null_solve(df_concat_numeric,null_colums) 34 | 35 | ### 리모델링 여부 반영 36 | df_concat_numeric['Remodleling'] = df_concat_numeric['YearBuilt']!=df_concat_numeric['YearRemodAdd'] 37 | 38 | df_quality_type = df_concat_numeric[['MSSubClass','OverallQual','OverallCond']] 39 | df_quantity_type = df_concat_numeric.drop(['MSSubClass','OverallQual','OverallCond'],axis=1) 40 | 41 | ############################################################### 42 | #Scaling_value 43 | ###Min_Max Scaling 44 | #from sklearn import preprocessing 45 | minmax_scale = preprocessing.MinMaxScaler().fit(df_quantity_type.iloc[train_index,1:].values) 46 | x_quantitiy_scaled = minmax_scale.transform(df_quantity_type.iloc[train_index,1:].values) 47 | 48 | ###One_hot Scaling 49 | one_hot = preprocessing.OneHotEncoder() 50 | one_hot.fit(df_quality_type.iloc[train_index].values) 51 | x_quality_scaled = one_hot.transform(df_quality_type.iloc[train_index].values).toarray() 52 | 53 | #Train 54 | x_scaled_data = np.hstack((x_quality_scaled,x_quantitiy_scaled)) 55 | Y_scaled_data = target_value.reshape(-1,) 56 | 57 | 58 | 59 | ############################################################### 60 | #Predict 61 | x_quan_predict_scaled = minmax_scale.transform(df_quantity_type.iloc[test_index,1:].values) 62 | x_qual_predict_scaled = one_hot.transform(df_quality_type.iloc[test_index].values).toarray() 63 | X_scaled_predict = np.hstack((x_qual_predict_scaled,x_quan_predict_scaled)) 64 | 65 | 66 | X_train = x_scaled_data 67 | y_train = Y_scaled_data 68 | X_test = X_scaled_predict 69 | test_id_idx = test_index + 1 70 | 71 | return X_train, X_test, y_train, test_id_idx 72 | -------------------------------------------------------------------------------- /code/ch2/housing.names: -------------------------------------------------------------------------------- 1 | 1. Title: Boston Housing Data 2 | 3 | 2. Sources: 4 | (a) Origin: This dataset was taken from the StatLib library which is 5 | maintained at Carnegie Mellon University. 6 | (b) Creator: Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the 7 | demand for clean air', J. Environ. Economics & Management, 8 | vol.5, 81-102, 1978. 9 | (c) Date: July 7, 1993 10 | 11 | 3. Past Usage: 12 | - Used in Belsley, Kuh & Welsch, 'Regression diagnostics ...', Wiley, 13 | 1980. N.B. Various transformations are used in the table on 14 | pages 244-261. 15 | - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. 16 | In Proceedings on the Tenth International Conference of Machine 17 | Learning, 236-243, University of Massachusetts, Amherst. Morgan 18 | Kaufmann. 19 | 20 | 4. Relevant Information: 21 | 22 | Concerns housing values in suburbs of Boston. 23 | 24 | 5. Number of Instances: 506 25 | 26 | 6. Number of Attributes: 13 continuous attributes (including "class" 27 | attribute "MEDV"), 1 binary-valued attribute. 28 | 29 | 7. Attribute Information: 30 | 31 | 1. CRIM per capita crime rate by town 32 | 2. ZN proportion of residential land zoned for lots over 33 | 25,000 sq.ft. 34 | 3. INDUS proportion of non-retail business acres per town 35 | 4. CHAS Charles River dummy variable (= 1 if tract bounds 36 | river; 0 otherwise) 37 | 5. NOX nitric oxides concentration (parts per 10 million) 38 | 6. RM average number of rooms per dwelling 39 | 7. AGE proportion of owner-occupied units built prior to 1940 40 | 8. DIS weighted distances to five Boston employment centres 41 | 9. RAD index of accessibility to radial highways 42 | 10. TAX full-value property-tax rate per $10,000 43 | 11. PTRATIO pupil-teacher ratio by town 44 | 12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks 45 | by town 46 | 13. LSTAT % lower status of the population 47 | 14. MEDV Median value of owner-occupied homes in $1000's 48 | 49 | 8. Missing Attribute Values: None. 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /code/ch3/2_numpy_reshape.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-09-26T06:33:47.101403Z", 9 | "start_time": "2017-09-26T06:33:46.925548Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import numpy as np" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "#### reshape" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 6, 28 | "metadata": { 29 | "ExecuteTime": { 30 | "end_time": "2017-09-26T06:34:35.652950Z", 31 | "start_time": "2017-09-26T06:34:35.648564Z" 32 | } 33 | }, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "(2, 4)" 39 | ] 40 | }, 41 | "execution_count": 6, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "test_matrix = [[1,2,3,4], [1,2,5,8]]\n", 48 | "np.array(test_matrix).shape" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 8, 54 | "metadata": { 55 | "ExecuteTime": { 56 | "end_time": "2017-09-26T06:34:40.873051Z", 57 | "start_time": "2017-09-26T06:34:40.868440Z" 58 | } 59 | }, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "array([[[1, 2],\n", 65 | " [3, 4]],\n", 66 | "\n", 67 | " [[1, 2],\n", 68 | " [5, 8]]])" 69 | ] 70 | }, 71 | "execution_count": 8, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "np.array(test_matrix).reshape(2,2,2)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 11, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "(8,)" 89 | ] 90 | }, 91 | "execution_count": 11, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "np.array(test_matrix).reshape(8,).shape" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 12, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "(2, 4)" 109 | ] 110 | }, 111 | "execution_count": 12, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "np.array(test_matrix).reshape(2,4).shape" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 9, 123 | "metadata": { 124 | "ExecuteTime": { 125 | "end_time": "2017-09-26T06:35:37.334641Z", 126 | "start_time": "2017-09-26T06:35:37.330454Z" 127 | } 128 | }, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "(2, 4)" 134 | ] 135 | }, 136 | "execution_count": 9, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "np.array(test_matrix).reshape(2,-1).shape" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 23, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "array([[[1, 2],\n", 154 | " [3, 4]],\n", 155 | "\n", 156 | " [[1, 2],\n", 157 | " [5, 8]]])" 158 | ] 159 | }, 160 | "execution_count": 23, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "np.array(test_matrix).reshape(2,2,2)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 25, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | "(2, 2, 2)" 178 | ] 179 | }, 180 | "execution_count": 25, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "np.array(test_matrix).reshape(2,2,2).shape" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "#### flat or flatten()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 40, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "array([1, 2, 3, 4, 1, 2, 5, 8, 1, 2, 3, 4, 1, 2, 5, 8])" 205 | ] 206 | }, 207 | "execution_count": 40, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "test_matrix = [[[1,2,3,4], [1,2,5,8]], [[1,2,3,4], [1,2,5,8]]]\n", 214 | "np.array(test_matrix).flatten()" 215 | ] 216 | } 217 | ], 218 | "metadata": { 219 | "anaconda-cloud": {}, 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.6.2" 236 | }, 237 | "nav_menu": {}, 238 | "toc": { 239 | "navigate_menu": true, 240 | "number_sections": true, 241 | "sideBar": true, 242 | "threshold": 6, 243 | "toc_cell": false, 244 | "toc_section_display": "block", 245 | "toc_window_display": false 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /code/ch3/3_indexing_slicing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-09-26T06:37:50.179800Z", 9 | "start_time": "2017-09-26T06:37:49.983626Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import numpy as np" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "ExecuteTime": { 23 | "end_time": "2017-09-26T06:37:51.740944Z", 24 | "start_time": "2017-09-26T06:37:51.731945Z" 25 | } 26 | }, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "array([[1, 2, 3],\n", 32 | " [4, 5, 6]])" 33 | ] 34 | }, 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "output_type": "execute_result" 38 | } 39 | ], 40 | "source": [ 41 | "test_exmaple = np.array([[1, 2, 3], [4.5, 5, 6]], int)\n", 42 | "test_exmaple" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "ExecuteTime": { 50 | "end_time": "2017-09-26T06:37:52.891230Z", 51 | "start_time": "2017-09-26T06:37:52.886872Z" 52 | } 53 | }, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "1" 59 | ] 60 | }, 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "test_exmaple[0][0]" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": { 74 | "ExecuteTime": { 75 | "end_time": "2017-09-26T06:37:53.651076Z", 76 | "start_time": "2017-09-26T06:37:53.646830Z" 77 | } 78 | }, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "1" 84 | ] 85 | }, 86 | "execution_count": 4, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "test_exmaple[0,0]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 8, 98 | "metadata": { 99 | "ExecuteTime": { 100 | "end_time": "2017-09-26T06:38:09.770954Z", 101 | "start_time": "2017-09-26T06:38:09.766561Z" 102 | } 103 | }, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "array([[10, 2, 3],\n", 109 | " [ 4, 5, 6]])" 110 | ] 111 | }, 112 | "execution_count": 8, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "test_exmaple[0,0] = 10 # Matrix 0,0 에 12 할당\n", 119 | "test_exmaple" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": { 126 | "ExecuteTime": { 127 | "end_time": "2017-09-26T06:38:12.442886Z", 128 | "start_time": "2017-09-26T06:38:12.438469Z" 129 | } 130 | }, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "5" 136 | ] 137 | }, 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "test_exmaple[0][0] = 5 # Matrix 0,0 에 12 할당\n", 145 | "test_exmaple[0,0]" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### slicing" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 22, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "array([[1, 2, 5, 8],\n", 164 | " [1, 2, 5, 8]])" 165 | ] 166 | }, 167 | "execution_count": 22, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "test_exmaple = np.array([\n", 174 | " [1, 2, 5,8], [1, 2, 5,8],[1, 2, 5,8],[1, 2, 5,8]], int)\n", 175 | "test_exmaple[:2,:] " 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 21, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "array([[1, 2, 5, 8],\n", 187 | " [1, 2, 5, 8]])" 188 | ] 189 | }, 190 | "execution_count": 21, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "test_exmaple[:,1:3] \n", 197 | "test_exmaple[1,:2] " 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 13, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "data": { 207 | "text/plain": [ 208 | "array([[ 3, 4, 5],\n", 209 | " [ 8, 9, 10]])" 210 | ] 211 | }, 212 | "execution_count": 13, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "test_exmaple = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], int)\n", 219 | "test_exmaple[:,2:] # 전체 Row의 2열 이상" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 14, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "array([7, 8])" 231 | ] 232 | }, 233 | "execution_count": 14, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "test_exmaple[1,1:3] # 1 Row의 1열 ~ 2열" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 10, 245 | "metadata": { 246 | "ExecuteTime": { 247 | "end_time": "2017-09-26T06:43:31.638458Z", 248 | "start_time": "2017-09-26T06:43:31.634892Z" 249 | } 250 | }, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": [ 255 | "array([[4, 5, 6]])" 256 | ] 257 | }, 258 | "execution_count": 10, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "test_exmaple[1:3] # 1 Row ~ 2Row의 전체" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 16, 270 | "metadata": { 271 | "ExecuteTime": { 272 | "end_time": "2017-09-26T06:45:47.055820Z", 273 | "start_time": "2017-09-26T06:45:47.050822Z" 274 | } 275 | }, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "array([[ 9],\n", 281 | " [19],\n", 282 | " [29],\n", 283 | " [39],\n", 284 | " [49],\n", 285 | " [59],\n", 286 | " [69],\n", 287 | " [79],\n", 288 | " [89],\n", 289 | " [99]])" 290 | ] 291 | }, 292 | "execution_count": 16, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "a = np.arange(100).reshape(10,10)\n", 299 | "a[:, -1].reshape(-1,1)" 300 | ] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "Python 3", 306 | "language": "python", 307 | "name": "python3" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.6.2" 320 | }, 321 | "nav_menu": {}, 322 | "toc": { 323 | "navigate_menu": true, 324 | "number_sections": true, 325 | "sideBar": true, 326 | "threshold": 6, 327 | "toc_cell": false, 328 | "toc_section_display": "block", 329 | "toc_window_display": false 330 | } 331 | }, 332 | "nbformat": 4, 333 | "nbformat_minor": 2 334 | } 335 | -------------------------------------------------------------------------------- /code/ch3/9_numpy_data_io.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 20, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2017-09-26T02:28:09.787514Z", 9 | "start_time": "2017-09-26T02:28:09.785012Z" 10 | }, 11 | "collapsed": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import numpy as np" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "#### load txt" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 15, 28 | "metadata": { 29 | "ExecuteTime": { 30 | "end_time": "2017-09-26T02:11:26.296847Z", 31 | "start_time": "2017-09-26T02:11:26.291077Z" 32 | } 33 | }, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "array([[ 1900., 30000., 4000., 48300.],\n", 39 | " [ 1901., 47200., 6100., 48200.],\n", 40 | " [ 1902., 70200., 9800., 41500.],\n", 41 | " [ 1903., 77400., 35200., 38200.],\n", 42 | " [ 1904., 36300., 59400., 40600.],\n", 43 | " [ 1905., 20600., 41700., 39800.],\n", 44 | " [ 1906., 18100., 19000., 38600.],\n", 45 | " [ 1907., 21400., 13000., 42300.],\n", 46 | " [ 1908., 22000., 8300., 44500.],\n", 47 | " [ 1909., 25400., 9100., 42100.]])" 48 | ] 49 | }, 50 | "execution_count": 15, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "a = np.loadtxt(\"./populations.txt\")\n", 57 | "a[:10]" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 18, 63 | "metadata": { 64 | "ExecuteTime": { 65 | "end_time": "2017-09-26T02:11:40.991394Z", 66 | "start_time": "2017-09-26T02:11:40.986710Z" 67 | } 68 | }, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "array([[ 1900, 30000, 4000, 48300],\n", 74 | " [ 1901, 47200, 6100, 48200],\n", 75 | " [ 1902, 70200, 9800, 41500]])" 76 | ] 77 | }, 78 | "execution_count": 18, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "a_int = a.astype(int)\n", 85 | "a_int[:3]" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 21, 91 | "metadata": { 92 | "ExecuteTime": { 93 | "end_time": "2017-09-26T02:28:11.931523Z", 94 | "start_time": "2017-09-26T02:28:11.928654Z" 95 | }, 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "np.savetxt('int_data.csv',a_int, delimiter=\",\")" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "#### numpy object - npy" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 31, 113 | "metadata": { 114 | "ExecuteTime": { 115 | "end_time": "2017-09-26T02:36:56.843546Z", 116 | "start_time": "2017-09-26T02:36:56.839840Z" 117 | }, 118 | "collapsed": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "np.save(\"npy_test\", arr=a_int)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 35, 128 | "metadata": { 129 | "ExecuteTime": { 130 | "end_time": "2017-09-26T02:37:07.300334Z", 131 | "start_time": "2017-09-26T02:37:07.295316Z" 132 | } 133 | }, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "array([[ 1900, 30000, 4000, 48300],\n", 139 | " [ 1901, 47200, 6100, 48200],\n", 140 | " [ 1902, 70200, 9800, 41500]])" 141 | ] 142 | }, 143 | "execution_count": 35, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "npy_array = np.load(file=\"npy_test.npy\")\n", 150 | "npy_array[:3]" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "Python 3", 166 | "language": "python", 167 | "name": "python3" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.6.1" 180 | }, 181 | "nav_menu": {}, 182 | "toc": { 183 | "navigate_menu": true, 184 | "number_sections": true, 185 | "sideBar": true, 186 | "threshold": 6, 187 | "toc_cell": false, 188 | "toc_section_display": "block", 189 | "toc_window_display": false 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 2 194 | } 195 | -------------------------------------------------------------------------------- /code/ch3/int_data.csv: -------------------------------------------------------------------------------- 1 | 1.900000000000000000e+03,3.000000000000000000e+04,4.000000000000000000e+03,4.830000000000000000e+04 2 | 1.901000000000000000e+03,4.720000000000000000e+04,6.100000000000000000e+03,4.820000000000000000e+04 3 | 1.902000000000000000e+03,7.020000000000000000e+04,9.800000000000000000e+03,4.150000000000000000e+04 4 | 1.903000000000000000e+03,7.740000000000000000e+04,3.520000000000000000e+04,3.820000000000000000e+04 5 | 1.904000000000000000e+03,3.630000000000000000e+04,5.940000000000000000e+04,4.060000000000000000e+04 6 | 1.905000000000000000e+03,2.060000000000000000e+04,4.170000000000000000e+04,3.980000000000000000e+04 7 | 1.906000000000000000e+03,1.810000000000000000e+04,1.900000000000000000e+04,3.860000000000000000e+04 8 | 1.907000000000000000e+03,2.140000000000000000e+04,1.300000000000000000e+04,4.230000000000000000e+04 9 | 1.908000000000000000e+03,2.200000000000000000e+04,8.300000000000000000e+03,4.450000000000000000e+04 10 | 1.909000000000000000e+03,2.540000000000000000e+04,9.100000000000000000e+03,4.210000000000000000e+04 11 | 1.910000000000000000e+03,2.710000000000000000e+04,7.400000000000000000e+03,4.600000000000000000e+04 12 | 1.911000000000000000e+03,4.030000000000000000e+04,8.000000000000000000e+03,4.680000000000000000e+04 13 | 1.912000000000000000e+03,5.700000000000000000e+04,1.230000000000000000e+04,4.380000000000000000e+04 14 | 1.913000000000000000e+03,7.660000000000000000e+04,1.950000000000000000e+04,4.090000000000000000e+04 15 | 1.914000000000000000e+03,5.230000000000000000e+04,4.570000000000000000e+04,3.940000000000000000e+04 16 | 1.915000000000000000e+03,1.950000000000000000e+04,5.110000000000000000e+04,3.900000000000000000e+04 17 | 1.916000000000000000e+03,1.120000000000000000e+04,2.970000000000000000e+04,3.670000000000000000e+04 18 | 1.917000000000000000e+03,7.600000000000000000e+03,1.580000000000000000e+04,4.180000000000000000e+04 19 | 1.918000000000000000e+03,1.460000000000000000e+04,9.700000000000000000e+03,4.330000000000000000e+04 20 | 1.919000000000000000e+03,1.620000000000000000e+04,1.010000000000000000e+04,4.130000000000000000e+04 21 | 1.920000000000000000e+03,2.470000000000000000e+04,8.600000000000000000e+03,4.730000000000000000e+04 22 | -------------------------------------------------------------------------------- /code/ch3/npy_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch3/npy_test.npy -------------------------------------------------------------------------------- /code/ch3/populations.txt: -------------------------------------------------------------------------------- 1 | # year hare lynx carrot 2 | 1900 30e3 4e3 48300 3 | 1901 47.2e3 6.1e3 48200 4 | 1902 70.2e3 9.8e3 41500 5 | 1903 77.4e3 35.2e3 38200 6 | 1904 36.3e3 59.4e3 40600 7 | 1905 20.6e3 41.7e3 39800 8 | 1906 18.1e3 19e3 38600 9 | 1907 21.4e3 13e3 42300 10 | 1908 22e3 8.3e3 44500 11 | 1909 25.4e3 9.1e3 42100 12 | 1910 27.1e3 7.4e3 46000 13 | 1911 40.3e3 8e3 46800 14 | 1912 57e3 12.3e3 43800 15 | 1913 76.6e3 19.5e3 40900 16 | 1914 52.3e3 45.7e3 39400 17 | 1915 19.5e3 51.1e3 39000 18 | 1916 11.2e3 29.7e3 36700 19 | 1917 7.6e3 15.8e3 41800 20 | 1918 14.6e3 9.7e3 43300 21 | 1919 16.2e3 10.1e3 41300 22 | 1920 24.7e3 8.6e3 47300 23 | -------------------------------------------------------------------------------- /code/ch4/6_dataframe_basic_operation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from pandas import Series\n", 11 | "from pandas import DataFrame\n", 12 | "\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Series addtion operation" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "s1 = Series(\n", 30 | " range(1,6), index=list(\"abced\"))\n", 31 | "s1" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "s2 = Series(\n", 41 | " range(5,11), index=list(\"bcedef\"))\n", 42 | "s2" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "s1 + s2" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "s1.add(s2)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### dataframe opertion" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "df1 = DataFrame(\n", 77 | " np.arange(9).reshape(3,3), \n", 78 | " columns=list(\"abc\"))\n", 79 | "df1" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "df2 = DataFrame(\n", 89 | " np.arange(16).reshape(4,4), \n", 90 | " columns=list(\"abcd\"))\n", 91 | "df2" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "df1 + df2" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "df1.add(df2,fill_value=0)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### operations for dataframe with series" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "df = DataFrame(\n", 126 | " np.arange(16).reshape(4,4), \n", 127 | " columns=list(\"abcd\"))\n", 128 | "df" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "s = Series(\n", 138 | " np.arange(10,14), \n", 139 | " index=list(\"abcd\"))\n", 140 | "s" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "df + s" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "s2 = Series(np.arange(10,14))\n", 159 | "s2" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "df" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "df + s2" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "df.add(s2, axis=0)" 187 | ] 188 | } 189 | ], 190 | "metadata": { 191 | "anaconda-cloud": {}, 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.6.3" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 2 212 | } 213 | -------------------------------------------------------------------------------- /code/ch4/8_built_in_functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from pandas import Series\n", 11 | "from pandas import DataFrame\n", 12 | "\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Built-in functions" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "df = pd.read_csv(\"data/wages.csv\")\n", 30 | "df.head()" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "df.describe()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "df.race.unique()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "dict(enumerate(sorted(df[\"race\"].unique())))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "value = list(map(int, np.array(list(enumerate(df[\"race\"].unique())))[:, 0].tolist()))\n", 67 | "key = np.array(list(enumerate(df[\"race\"].unique())), dtype=str)[:, 1].tolist()\n", 68 | "\n", 69 | "value, key" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "df[\"race\"].replace(to_replace=key, value=value, inplace=True)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "df[\"race\"]" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "value = list(map(int, np.array(list(enumerate(df[\"sex\"].unique())))[:, 0].tolist()))\n", 97 | "key = np.array(list(enumerate(df[\"sex\"].unique())), dtype=str)[:, 1].tolist()\n", 98 | "\n", 99 | "value, key" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "df[\"sex\"].replace(to_replace=key, value=value, inplace=True)\n", 109 | "df.head(5)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "df.sum(axis=0)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "df.sum(axis=1)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "df.isnull()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "df.isnull().sum(0)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "df.sort_values([\"age\", \"earn\"], ascending=False).head(10)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "df.cumsum().head(5)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "df.cummax().head(10)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "df.sort_values(\"age\", ascending=False).head(10)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "df.age.corr(df.earn)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "df.age[(df.age<45) & (df.age>15)].corr(df.earn)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "df.age.cov(df.earn)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "df.corr()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "df.corrwith(df.earn)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "df.sex.value_counts(sort=True)" 236 | ] 237 | } 238 | ], 239 | "metadata": { 240 | "anaconda-cloud": {}, 241 | "kernelspec": { 242 | "display_name": "Python 3", 243 | "language": "python", 244 | "name": "python3" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.6.3" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 2 261 | } 262 | -------------------------------------------------------------------------------- /code/ch4/data/excel-comp-data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch4/data/excel-comp-data.xlsx -------------------------------------------------------------------------------- /code/ch5/data/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | Month,#Passengers 2 | 1949-01,112 3 | 1949-02,118 4 | 1949-03,132 5 | 1949-04,129 6 | 1949-05,121 7 | 1949-06,135 8 | 1949-07,148 9 | 1949-08,148 10 | 1949-09,136 11 | 1949-10,119 12 | 1949-11,104 13 | 1949-12,118 14 | 1950-01,115 15 | 1950-02,126 16 | 1950-03,141 17 | 1950-04,135 18 | 1950-05,125 19 | 1950-06,149 20 | 1950-07,170 21 | 1950-08,170 22 | 1950-09,158 23 | 1950-10,133 24 | 1950-11,114 25 | 1950-12,140 26 | 1951-01,145 27 | 1951-02,150 28 | 1951-03,178 29 | 1951-04,163 30 | 1951-05,172 31 | 1951-06,178 32 | 1951-07,199 33 | 1951-08,199 34 | 1951-09,184 35 | 1951-10,162 36 | 1951-11,146 37 | 1951-12,166 38 | 1952-01,171 39 | 1952-02,180 40 | 1952-03,193 41 | 1952-04,181 42 | 1952-05,183 43 | 1952-06,218 44 | 1952-07,230 45 | 1952-08,242 46 | 1952-09,209 47 | 1952-10,191 48 | 1952-11,172 49 | 1952-12,194 50 | 1953-01,196 51 | 1953-02,196 52 | 1953-03,236 53 | 1953-04,235 54 | 1953-05,229 55 | 1953-06,243 56 | 1953-07,264 57 | 1953-08,272 58 | 1953-09,237 59 | 1953-10,211 60 | 1953-11,180 61 | 1953-12,201 62 | 1954-01,204 63 | 1954-02,188 64 | 1954-03,235 65 | 1954-04,227 66 | 1954-05,234 67 | 1954-06,264 68 | 1954-07,302 69 | 1954-08,293 70 | 1954-09,259 71 | 1954-10,229 72 | 1954-11,203 73 | 1954-12,229 74 | 1955-01,242 75 | 1955-02,233 76 | 1955-03,267 77 | 1955-04,269 78 | 1955-05,270 79 | 1955-06,315 80 | 1955-07,364 81 | 1955-08,347 82 | 1955-09,312 83 | 1955-10,274 84 | 1955-11,237 85 | 1955-12,278 86 | 1956-01,284 87 | 1956-02,277 88 | 1956-03,317 89 | 1956-04,313 90 | 1956-05,318 91 | 1956-06,374 92 | 1956-07,413 93 | 1956-08,405 94 | 1956-09,355 95 | 1956-10,306 96 | 1956-11,271 97 | 1956-12,306 98 | 1957-01,315 99 | 1957-02,301 100 | 1957-03,356 101 | 1957-04,348 102 | 1957-05,355 103 | 1957-06,422 104 | 1957-07,465 105 | 1957-08,467 106 | 1957-09,404 107 | 1957-10,347 108 | 1957-11,305 109 | 1957-12,336 110 | 1958-01,340 111 | 1958-02,318 112 | 1958-03,362 113 | 1958-04,348 114 | 1958-05,363 115 | 1958-06,435 116 | 1958-07,491 117 | 1958-08,505 118 | 1958-09,404 119 | 1958-10,359 120 | 1958-11,310 121 | 1958-12,337 122 | 1959-01,360 123 | 1959-02,342 124 | 1959-03,406 125 | 1959-04,396 126 | 1959-05,420 127 | 1959-06,472 128 | 1959-07,548 129 | 1959-08,559 130 | 1959-09,463 131 | 1959-10,407 132 | 1959-11,362 133 | 1959-12,405 134 | 1960-01,417 135 | 1960-02,391 136 | 1960-03,419 137 | 1960-04,461 138 | 1960-05,472 139 | 1960-06,535 140 | 1960-07,622 141 | 1960-08,606 142 | 1960-09,508 143 | 1960-10,461 144 | 1960-11,390 145 | 1960-12,432 146 | -------------------------------------------------------------------------------- /code/ch5/data/customer-status.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/customer-status.xlsx -------------------------------------------------------------------------------- /code/ch5/data/excel-comp-data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/excel-comp-data.xlsx -------------------------------------------------------------------------------- /code/ch5/data/flights.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/flights.db -------------------------------------------------------------------------------- /code/ch5/data/movie_rating.csv: -------------------------------------------------------------------------------- 1 | critic,title,rating 2 | Jack Matthews,Lady in the Water,3.0 3 | Jack Matthews,Snakes on a Plane,4.0 4 | Jack Matthews,You Me and Dupree,3.5 5 | Jack Matthews,Superman Returns,5.0 6 | Jack Matthews,The Night Listener,3.0 7 | Mick LaSalle,Lady in the Water,3.0 8 | Mick LaSalle,Snakes on a Plane,4.0 9 | Mick LaSalle,Just My Luck,2.0 10 | Mick LaSalle,Superman Returns,3.0 11 | Mick LaSalle,You Me and Dupree,2.0 12 | Mick LaSalle,The Night Listener,3.0 13 | Claudia Puig,Snakes on a Plane,3.5 14 | Claudia Puig,Just My Luck,3.0 15 | Claudia Puig,You Me and Dupree,2.5 16 | Claudia Puig,Superman Returns,4.0 17 | Claudia Puig,The Night Listener,4.5 18 | Lisa Rose,Lady in the Water,2.5 19 | Lisa Rose,Snakes on a Plane,3.5 20 | Lisa Rose,Just My Luck,3.0 21 | Lisa Rose,Superman Returns,3.5 22 | Lisa Rose,The Night Listener,3.0 23 | Lisa Rose,You Me and Dupree,2.5 24 | Toby,Snakes on a Plane,4.5 25 | Toby,Superman Returns,4.0 26 | Toby,You Me and Dupree,1.0 27 | Gene Seymour,Lady in the Water,3.0 28 | Gene Seymour,Snakes on a Plane,3.5 29 | Gene Seymour,Just My Luck,1.5 30 | Gene Seymour,Superman Returns,5.0 31 | Gene Seymour,You Me and Dupree,3.5 32 | Gene Seymour,The Night Listener,3.0 33 | -------------------------------------------------------------------------------- /code/ch5/data/sales-feb-2014.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/sales-feb-2014.xlsx -------------------------------------------------------------------------------- /code/ch5/data/sales-jan-2014.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/sales-jan-2014.xlsx -------------------------------------------------------------------------------- /code/ch5/data/sales-mar-2014.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/sales-mar-2014.xlsx -------------------------------------------------------------------------------- /code/ch6/9_pipeline_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": "true" 7 | }, 8 | "source": [ 9 | "# Table of Contents\n", 10 | "

" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2017-11-14T07:30:27.836809Z", 19 | "start_time": "2017-11-14T07:30:27.648759Z" 20 | }, 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "from sklearn.datasets import load_boston\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": { 34 | "ExecuteTime": { 35 | "end_time": "2017-11-14T07:30:27.846084Z", 36 | "start_time": "2017-11-14T07:30:27.837993Z" 37 | }, 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "boston = load_boston()\n", 43 | "\n", 44 | "x_data = boston.data\n", 45 | "y_data = boston.target.reshape(boston.target.size,1)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": { 52 | "ExecuteTime": { 53 | "end_time": "2017-11-14T07:30:27.952644Z", 54 | "start_time": "2017-11-14T07:30:27.946769Z" 55 | }, 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "from sklearn.model_selection import train_test_split\n", 61 | "x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=22)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "metadata": { 68 | "ExecuteTime": { 69 | "end_time": "2017-11-14T07:30:28.402194Z", 70 | "start_time": "2017-11-14T07:30:28.396785Z" 71 | } 72 | }, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "((404, 13), (404, 1), (102, 13), (102, 1))" 78 | ] 79 | }, 80 | "execution_count": 5, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "x_train.shape, y_train.shape, x_test.shape, y_test.shape" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 7, 92 | "metadata": { 93 | "ExecuteTime": { 94 | "end_time": "2017-11-14T07:31:50.044854Z", 95 | "start_time": "2017-11-14T07:31:50.002449Z" 96 | } 97 | }, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "0.76580205144610103" 103 | ] 104 | }, 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "from sklearn.preprocessing import StandardScaler\n", 112 | "from sklearn.linear_model import LinearRegression\n", 113 | "from sklearn.neural_network import MLPRegressor\n", 114 | "\n", 115 | "from sklearn.pipeline import Pipeline\n", 116 | "\n", 117 | "pipe_lr = Pipeline(steps=[\n", 118 | " ('scl', StandardScaler()), ('regr', LinearRegression())\n", 119 | "])\n", 120 | "pipe_mlp = Pipeline(steps=[\n", 121 | " ('scl', StandardScaler()), ('regr', MLPRegressor())\n", 122 | "])\n", 123 | "\n", 124 | "\n", 125 | "pipe_lr.fit(X=x_train, y=y_train)\n", 126 | "pipe_lr.score(x_test, y_test)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 8, 132 | "metadata": { 133 | "ExecuteTime": { 134 | "end_time": "2017-11-14T07:31:54.034083Z", 135 | "start_time": "2017-11-14T07:31:53.762815Z" 136 | } 137 | }, 138 | "outputs": [ 139 | { 140 | "name": "stderr", 141 | "output_type": "stream", 142 | "text": [ 143 | "/Users/sungchulchoi/miniconda3/envs/ml_python/lib/python3.6/site-packages/sklearn/neural_network/multilayer_perceptron.py:1266: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", 144 | " y = column_or_1d(y, warn=True)\n", 145 | "/Users/sungchulchoi/miniconda3/envs/ml_python/lib/python3.6/site-packages/sklearn/neural_network/multilayer_perceptron.py:563: ConvergenceWarning: Stochastic Optimizer: Maximum iterations reached and the optimization hasn't converged yet.\n", 146 | " % (), ConvergenceWarning)\n" 147 | ] 148 | }, 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "0.74963877810227975" 153 | ] 154 | }, 155 | "execution_count": 8, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "pipe_mlp.fit(X=x_train, y=y_train)\n", 162 | "pipe_mlp.score(x_test, y_test)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "_draft": { 177 | "nbviewer_url": "https://gist.github.com/152eb43422672d8a45ed69dd2e98e256" 178 | }, 179 | "anaconda-cloud": {}, 180 | "gist": { 181 | "data": { 182 | "description": "codes/4_ml/1_linear_regression/9_pipeline_example.ipynb", 183 | "public": true 184 | }, 185 | "id": "152eb43422672d8a45ed69dd2e98e256" 186 | }, 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.6.2" 203 | }, 204 | "nav_menu": {}, 205 | "toc": { 206 | "navigate_menu": true, 207 | "number_sections": true, 208 | "sideBar": true, 209 | "threshold": 6, 210 | "toc_cell": true, 211 | "toc_section_display": "block", 212 | "toc_window_display": false 213 | }, 214 | "toc_position": { 215 | "height": "830px", 216 | "left": "0px", 217 | "right": "auto", 218 | "top": "106px", 219 | "width": "212px" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 2 224 | } 225 | -------------------------------------------------------------------------------- /code/ch6/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch6/test.png -------------------------------------------------------------------------------- /code/ch6/titanic/gender_submission.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived 2 | 892,0 3 | 893,1 4 | 894,0 5 | 895,0 6 | 896,1 7 | 897,0 8 | 898,1 9 | 899,0 10 | 900,1 11 | 901,0 12 | 902,0 13 | 903,0 14 | 904,1 15 | 905,0 16 | 906,1 17 | 907,1 18 | 908,0 19 | 909,0 20 | 910,1 21 | 911,1 22 | 912,0 23 | 913,0 24 | 914,1 25 | 915,0 26 | 916,1 27 | 917,0 28 | 918,1 29 | 919,0 30 | 920,0 31 | 921,0 32 | 922,0 33 | 923,0 34 | 924,1 35 | 925,1 36 | 926,0 37 | 927,0 38 | 928,1 39 | 929,1 40 | 930,0 41 | 931,0 42 | 932,0 43 | 933,0 44 | 934,0 45 | 935,1 46 | 936,1 47 | 937,0 48 | 938,0 49 | 939,0 50 | 940,1 51 | 941,1 52 | 942,0 53 | 943,0 54 | 944,1 55 | 945,1 56 | 946,0 57 | 947,0 58 | 948,0 59 | 949,0 60 | 950,0 61 | 951,1 62 | 952,0 63 | 953,0 64 | 954,0 65 | 955,1 66 | 956,0 67 | 957,1 68 | 958,1 69 | 959,0 70 | 960,0 71 | 961,1 72 | 962,1 73 | 963,0 74 | 964,1 75 | 965,0 76 | 966,1 77 | 967,0 78 | 968,0 79 | 969,1 80 | 970,0 81 | 971,1 82 | 972,0 83 | 973,0 84 | 974,0 85 | 975,0 86 | 976,0 87 | 977,0 88 | 978,1 89 | 979,1 90 | 980,1 91 | 981,0 92 | 982,1 93 | 983,0 94 | 984,1 95 | 985,0 96 | 986,0 97 | 987,0 98 | 988,1 99 | 989,0 100 | 990,1 101 | 991,0 102 | 992,1 103 | 993,0 104 | 994,0 105 | 995,0 106 | 996,1 107 | 997,0 108 | 998,0 109 | 999,0 110 | 1000,0 111 | 1001,0 112 | 1002,0 113 | 1003,1 114 | 1004,1 115 | 1005,1 116 | 1006,1 117 | 1007,0 118 | 1008,0 119 | 1009,1 120 | 1010,0 121 | 1011,1 122 | 1012,1 123 | 1013,0 124 | 1014,1 125 | 1015,0 126 | 1016,0 127 | 1017,1 128 | 1018,0 129 | 1019,1 130 | 1020,0 131 | 1021,0 132 | 1022,0 133 | 1023,0 134 | 1024,1 135 | 1025,0 136 | 1026,0 137 | 1027,0 138 | 1028,0 139 | 1029,0 140 | 1030,1 141 | 1031,0 142 | 1032,1 143 | 1033,1 144 | 1034,0 145 | 1035,0 146 | 1036,0 147 | 1037,0 148 | 1038,0 149 | 1039,0 150 | 1040,0 151 | 1041,0 152 | 1042,1 153 | 1043,0 154 | 1044,0 155 | 1045,1 156 | 1046,0 157 | 1047,0 158 | 1048,1 159 | 1049,1 160 | 1050,0 161 | 1051,1 162 | 1052,1 163 | 1053,0 164 | 1054,1 165 | 1055,0 166 | 1056,0 167 | 1057,1 168 | 1058,0 169 | 1059,0 170 | 1060,1 171 | 1061,1 172 | 1062,0 173 | 1063,0 174 | 1064,0 175 | 1065,0 176 | 1066,0 177 | 1067,1 178 | 1068,1 179 | 1069,0 180 | 1070,1 181 | 1071,1 182 | 1072,0 183 | 1073,0 184 | 1074,1 185 | 1075,0 186 | 1076,1 187 | 1077,0 188 | 1078,1 189 | 1079,0 190 | 1080,1 191 | 1081,0 192 | 1082,0 193 | 1083,0 194 | 1084,0 195 | 1085,0 196 | 1086,0 197 | 1087,0 198 | 1088,0 199 | 1089,1 200 | 1090,0 201 | 1091,1 202 | 1092,1 203 | 1093,0 204 | 1094,0 205 | 1095,1 206 | 1096,0 207 | 1097,0 208 | 1098,1 209 | 1099,0 210 | 1100,1 211 | 1101,0 212 | 1102,0 213 | 1103,0 214 | 1104,0 215 | 1105,1 216 | 1106,1 217 | 1107,0 218 | 1108,1 219 | 1109,0 220 | 1110,1 221 | 1111,0 222 | 1112,1 223 | 1113,0 224 | 1114,1 225 | 1115,0 226 | 1116,1 227 | 1117,1 228 | 1118,0 229 | 1119,1 230 | 1120,0 231 | 1121,0 232 | 1122,0 233 | 1123,1 234 | 1124,0 235 | 1125,0 236 | 1126,0 237 | 1127,0 238 | 1128,0 239 | 1129,0 240 | 1130,1 241 | 1131,1 242 | 1132,1 243 | 1133,1 244 | 1134,0 245 | 1135,0 246 | 1136,0 247 | 1137,0 248 | 1138,1 249 | 1139,0 250 | 1140,1 251 | 1141,1 252 | 1142,1 253 | 1143,0 254 | 1144,0 255 | 1145,0 256 | 1146,0 257 | 1147,0 258 | 1148,0 259 | 1149,0 260 | 1150,1 261 | 1151,0 262 | 1152,0 263 | 1153,0 264 | 1154,1 265 | 1155,1 266 | 1156,0 267 | 1157,0 268 | 1158,0 269 | 1159,0 270 | 1160,1 271 | 1161,0 272 | 1162,0 273 | 1163,0 274 | 1164,1 275 | 1165,1 276 | 1166,0 277 | 1167,1 278 | 1168,0 279 | 1169,0 280 | 1170,0 281 | 1171,0 282 | 1172,1 283 | 1173,0 284 | 1174,1 285 | 1175,1 286 | 1176,1 287 | 1177,0 288 | 1178,0 289 | 1179,0 290 | 1180,0 291 | 1181,0 292 | 1182,0 293 | 1183,1 294 | 1184,0 295 | 1185,0 296 | 1186,0 297 | 1187,0 298 | 1188,1 299 | 1189,0 300 | 1190,0 301 | 1191,0 302 | 1192,0 303 | 1193,0 304 | 1194,0 305 | 1195,0 306 | 1196,1 307 | 1197,1 308 | 1198,0 309 | 1199,0 310 | 1200,0 311 | 1201,1 312 | 1202,0 313 | 1203,0 314 | 1204,0 315 | 1205,1 316 | 1206,1 317 | 1207,1 318 | 1208,0 319 | 1209,0 320 | 1210,0 321 | 1211,0 322 | 1212,0 323 | 1213,0 324 | 1214,0 325 | 1215,0 326 | 1216,1 327 | 1217,0 328 | 1218,1 329 | 1219,0 330 | 1220,0 331 | 1221,0 332 | 1222,1 333 | 1223,0 334 | 1224,0 335 | 1225,1 336 | 1226,0 337 | 1227,0 338 | 1228,0 339 | 1229,0 340 | 1230,0 341 | 1231,0 342 | 1232,0 343 | 1233,0 344 | 1234,0 345 | 1235,1 346 | 1236,0 347 | 1237,1 348 | 1238,0 349 | 1239,1 350 | 1240,0 351 | 1241,1 352 | 1242,1 353 | 1243,0 354 | 1244,0 355 | 1245,0 356 | 1246,1 357 | 1247,0 358 | 1248,1 359 | 1249,0 360 | 1250,0 361 | 1251,1 362 | 1252,0 363 | 1253,1 364 | 1254,1 365 | 1255,0 366 | 1256,1 367 | 1257,1 368 | 1258,0 369 | 1259,1 370 | 1260,1 371 | 1261,0 372 | 1262,0 373 | 1263,1 374 | 1264,0 375 | 1265,0 376 | 1266,1 377 | 1267,1 378 | 1268,1 379 | 1269,0 380 | 1270,0 381 | 1271,0 382 | 1272,0 383 | 1273,0 384 | 1274,1 385 | 1275,1 386 | 1276,0 387 | 1277,1 388 | 1278,0 389 | 1279,0 390 | 1280,0 391 | 1281,0 392 | 1282,0 393 | 1283,1 394 | 1284,0 395 | 1285,0 396 | 1286,0 397 | 1287,1 398 | 1288,0 399 | 1289,1 400 | 1290,0 401 | 1291,0 402 | 1292,1 403 | 1293,0 404 | 1294,1 405 | 1295,0 406 | 1296,0 407 | 1297,0 408 | 1298,0 409 | 1299,0 410 | 1300,1 411 | 1301,1 412 | 1302,1 413 | 1303,1 414 | 1304,1 415 | 1305,0 416 | 1306,1 417 | 1307,0 418 | 1308,0 419 | 1309,0 420 | -------------------------------------------------------------------------------- /code/ch6/titanic/submission_result.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived 2 | 892,0 3 | 893,0 4 | 894,0 5 | 895,0 6 | 896,1 7 | 897,0 8 | 898,1 9 | 899,0 10 | 900,1 11 | 901,0 12 | 902,0 13 | 903,0 14 | 904,1 15 | 905,0 16 | 906,1 17 | 907,1 18 | 908,0 19 | 909,0 20 | 910,1 21 | 911,1 22 | 912,0 23 | 913,0 24 | 914,1 25 | 915,1 26 | 916,1 27 | 917,0 28 | 918,1 29 | 919,0 30 | 920,0 31 | 921,0 32 | 922,0 33 | 923,0 34 | 924,0 35 | 925,1 36 | 926,1 37 | 927,0 38 | 928,1 39 | 929,1 40 | 930,0 41 | 931,0 42 | 932,0 43 | 933,0 44 | 934,0 45 | 935,1 46 | 936,1 47 | 937,0 48 | 938,0 49 | 939,0 50 | 940,1 51 | 941,0 52 | 942,0 53 | 943,0 54 | 944,1 55 | 945,1 56 | 946,0 57 | 947,0 58 | 948,0 59 | 949,0 60 | 950,0 61 | 951,1 62 | 952,0 63 | 953,0 64 | 954,0 65 | 955,1 66 | 956,1 67 | 957,1 68 | 958,1 69 | 959,0 70 | 960,1 71 | 961,1 72 | 962,1 73 | 963,0 74 | 964,1 75 | 965,1 76 | 966,1 77 | 967,1 78 | 968,0 79 | 969,1 80 | 970,0 81 | 971,1 82 | 972,0 83 | 973,0 84 | 974,0 85 | 975,0 86 | 976,0 87 | 977,0 88 | 978,1 89 | 979,1 90 | 980,1 91 | 981,0 92 | 982,1 93 | 983,0 94 | 984,1 95 | 985,0 96 | 986,1 97 | 987,0 98 | 988,1 99 | 989,0 100 | 990,1 101 | 991,0 102 | 992,1 103 | 993,0 104 | 994,0 105 | 995,0 106 | 996,1 107 | 997,0 108 | 998,0 109 | 999,0 110 | 1000,0 111 | 1001,0 112 | 1002,0 113 | 1003,1 114 | 1004,1 115 | 1005,1 116 | 1006,1 117 | 1007,0 118 | 1008,0 119 | 1009,1 120 | 1010,1 121 | 1011,1 122 | 1012,1 123 | 1013,0 124 | 1014,1 125 | 1015,0 126 | 1016,0 127 | 1017,1 128 | 1018,0 129 | 1019,1 130 | 1020,0 131 | 1021,0 132 | 1022,0 133 | 1023,0 134 | 1024,1 135 | 1025,0 136 | 1026,0 137 | 1027,0 138 | 1028,0 139 | 1029,0 140 | 1030,1 141 | 1031,0 142 | 1032,0 143 | 1033,1 144 | 1034,0 145 | 1035,0 146 | 1036,0 147 | 1037,0 148 | 1038,0 149 | 1039,0 150 | 1040,0 151 | 1041,0 152 | 1042,1 153 | 1043,0 154 | 1044,0 155 | 1045,0 156 | 1046,0 157 | 1047,0 158 | 1048,1 159 | 1049,1 160 | 1050,0 161 | 1051,1 162 | 1052,1 163 | 1053,0 164 | 1054,1 165 | 1055,0 166 | 1056,0 167 | 1057,1 168 | 1058,0 169 | 1059,0 170 | 1060,1 171 | 1061,1 172 | 1062,0 173 | 1063,0 174 | 1064,0 175 | 1065,0 176 | 1066,0 177 | 1067,1 178 | 1068,1 179 | 1069,0 180 | 1070,1 181 | 1071,1 182 | 1072,0 183 | 1073,0 184 | 1074,1 185 | 1075,0 186 | 1076,1 187 | 1077,0 188 | 1078,1 189 | 1079,0 190 | 1080,0 191 | 1081,0 192 | 1082,0 193 | 1083,0 194 | 1084,0 195 | 1085,0 196 | 1086,0 197 | 1087,0 198 | 1088,1 199 | 1089,1 200 | 1090,0 201 | 1091,1 202 | 1092,1 203 | 1093,0 204 | 1094,0 205 | 1095,1 206 | 1096,0 207 | 1097,0 208 | 1098,1 209 | 1099,0 210 | 1100,1 211 | 1101,0 212 | 1102,0 213 | 1103,0 214 | 1104,0 215 | 1105,0 216 | 1106,0 217 | 1107,0 218 | 1108,1 219 | 1109,0 220 | 1110,1 221 | 1111,0 222 | 1112,1 223 | 1113,0 224 | 1114,1 225 | 1115,0 226 | 1116,1 227 | 1117,1 228 | 1118,0 229 | 1119,1 230 | 1120,0 231 | 1121,0 232 | 1122,0 233 | 1123,1 234 | 1124,0 235 | 1125,0 236 | 1126,0 237 | 1127,0 238 | 1128,0 239 | 1129,0 240 | 1130,1 241 | 1131,1 242 | 1132,1 243 | 1133,1 244 | 1134,0 245 | 1135,0 246 | 1136,0 247 | 1137,0 248 | 1138,1 249 | 1139,0 250 | 1140,1 251 | 1141,1 252 | 1142,1 253 | 1143,0 254 | 1144,1 255 | 1145,0 256 | 1146,0 257 | 1147,0 258 | 1148,0 259 | 1149,0 260 | 1150,1 261 | 1151,0 262 | 1152,0 263 | 1153,0 264 | 1154,1 265 | 1155,1 266 | 1156,0 267 | 1157,0 268 | 1158,0 269 | 1159,0 270 | 1160,1 271 | 1161,0 272 | 1162,0 273 | 1163,0 274 | 1164,1 275 | 1165,1 276 | 1166,0 277 | 1167,1 278 | 1168,0 279 | 1169,0 280 | 1170,0 281 | 1171,0 282 | 1172,1 283 | 1173,0 284 | 1174,1 285 | 1175,1 286 | 1176,1 287 | 1177,0 288 | 1178,0 289 | 1179,0 290 | 1180,0 291 | 1181,0 292 | 1182,0 293 | 1183,1 294 | 1184,0 295 | 1185,0 296 | 1186,0 297 | 1187,0 298 | 1188,1 299 | 1189,0 300 | 1190,0 301 | 1191,0 302 | 1192,0 303 | 1193,0 304 | 1194,0 305 | 1195,0 306 | 1196,1 307 | 1197,1 308 | 1198,0 309 | 1199,0 310 | 1200,0 311 | 1201,0 312 | 1202,0 313 | 1203,0 314 | 1204,0 315 | 1205,1 316 | 1206,1 317 | 1207,1 318 | 1208,0 319 | 1209,0 320 | 1210,0 321 | 1211,0 322 | 1212,0 323 | 1213,0 324 | 1214,0 325 | 1215,0 326 | 1216,1 327 | 1217,0 328 | 1218,1 329 | 1219,0 330 | 1220,0 331 | 1221,0 332 | 1222,1 333 | 1223,0 334 | 1224,0 335 | 1225,1 336 | 1226,0 337 | 1227,0 338 | 1228,0 339 | 1229,0 340 | 1230,0 341 | 1231,0 342 | 1232,0 343 | 1233,0 344 | 1234,0 345 | 1235,1 346 | 1236,0 347 | 1237,1 348 | 1238,0 349 | 1239,1 350 | 1240,0 351 | 1241,1 352 | 1242,1 353 | 1243,0 354 | 1244,0 355 | 1245,0 356 | 1246,1 357 | 1247,0 358 | 1248,1 359 | 1249,0 360 | 1250,0 361 | 1251,0 362 | 1252,0 363 | 1253,1 364 | 1254,1 365 | 1255,0 366 | 1256,1 367 | 1257,0 368 | 1258,0 369 | 1259,1 370 | 1260,1 371 | 1261,0 372 | 1262,0 373 | 1263,1 374 | 1264,0 375 | 1265,0 376 | 1266,1 377 | 1267,1 378 | 1268,0 379 | 1269,0 380 | 1270,0 381 | 1271,0 382 | 1272,0 383 | 1273,0 384 | 1274,1 385 | 1275,1 386 | 1276,0 387 | 1277,1 388 | 1278,0 389 | 1279,0 390 | 1280,0 391 | 1281,0 392 | 1282,1 393 | 1283,1 394 | 1284,0 395 | 1285,0 396 | 1286,0 397 | 1287,1 398 | 1288,0 399 | 1289,1 400 | 1290,0 401 | 1291,0 402 | 1292,1 403 | 1293,0 404 | 1294,1 405 | 1295,1 406 | 1296,0 407 | 1297,0 408 | 1298,0 409 | 1299,0 410 | 1300,1 411 | 1301,1 412 | 1302,1 413 | 1303,1 414 | 1304,1 415 | 1305,0 416 | 1306,1 417 | 1307,0 418 | 1308,0 419 | 1309,0 420 | -------------------------------------------------------------------------------- /code/ch7/data/slr06.csv: -------------------------------------------------------------------------------- 1 | X,Y 2 | 108,392.5 3 | 19,46.2 4 | 13,15.7 5 | 124,422.2 6 | 40,119.4 7 | 57,170.9 8 | 23,56.9 9 | 14,77.5 10 | 45,214 11 | 10,65.3 12 | 5,20.9 13 | 48,248.1 14 | 11,23.5 15 | 23,39.6 16 | 7,48.8 17 | 2,6.6 18 | 24,134.9 19 | 6,50.9 20 | 3,4.4 21 | 23,113 22 | 6,14.8 23 | 9,48.7 24 | 9,52.1 25 | 3,13.2 26 | 29,103.9 27 | 7,77.5 28 | 4,11.8 29 | 20,98.1 30 | 7,27.9 31 | 4,38.1 32 | 0,0 33 | 25,69.2 34 | 6,14.6 35 | 5,40.3 36 | 22,161.5 37 | 11,57.2 38 | 61,217.6 39 | 12,58.1 40 | 4,12.6 41 | 16,59.6 42 | 13,89.9 43 | 60,202.4 44 | 41,181.3 45 | 37,152.8 46 | 55,162.8 47 | 41,73.4 48 | 11,21.3 49 | 27,92.6 50 | 8,76.1 51 | 3,39.9 52 | 17,142.1 53 | 13,93 54 | 13,31.9 55 | 15,32.1 56 | 8,55.6 57 | 29,133.3 58 | 30,194.5 59 | 24,137.9 60 | 9,87.4 61 | 31,209.8 62 | 14,95.5 63 | 53,244.6 64 | 26,187.5 -------------------------------------------------------------------------------- /code/ch8/yield.csv: -------------------------------------------------------------------------------- 1 | i Temp Yield 2 | 1 50 3.3 3 | 2 50 2.8 4 | 3 50 2.9 5 | 4 70 2.3 6 | 5 70 2.6 7 | 6 70 2.1 8 | 7 80 2.5 9 | 8 80 2.9 10 | 9 80 2.4 11 | 10 90 3 12 | 11 90 3.1 13 | 12 90 2.8 14 | 13 100 3.3 15 | 14 100 3.5 16 | 15 100 3 17 | -------------------------------------------------------------------------------- /code/ch9/data/generator.csv: -------------------------------------------------------------------------------- 1 | ID,RPM,VIBRATION,STATUS 1,568,585,good 2,586,565,good 3,609,536,good 4,616,492,good 5,632,465,good 6,652,528,good 7,655,496,good 8,660,471,good 9,688,408,good 10,696,399,good 11,708,387,good 12,701,434,good 13,715,506,good 14,732,485,good 15,731,395,good 16,749,398,good 17,759,512,good 18,773,431,good 19,782,456,good 20,797,476,good 21,794,421,good 22,824,452,good 23,835,441,good 24,862,372,good 25,879,340,good 26,892,370,good 27,913,373,good 28,933,330,good 29,562,309,faulty 30,578,346,faulty 31,593,357,faulty 32,626,341,faulty 33,635,252,faulty 34,658,235,faulty 35,663,299,faulty 36,677,223,faulty 37,685,303,faulty 38,698,197,faulty 39,699,311,faulty 40,712,257,faulty 41,722,193,faulty 42,735,259,faulty 43,738,314,faulty 44,753,113,faulty 45,767,286,faulty 46,771,264,faulty 47,780,137,faulty 48,784,131,faulty 49,798,132,faulty 50,820,152,faulty 51,834,157,faulty 52,858,163,faulty 53,888,91,faulty 54,891,156,faulty 55,911,79,faulty 56,939,99,faulty -------------------------------------------------------------------------------- /code/ch9/data/generators.csv: -------------------------------------------------------------------------------- 1 | ID RPM VIBRATION STATUS 2 | 1 568 585 good 3 | 2 586 565 good 4 | 3 609 536 good 5 | 4 616 492 good 6 | 5 632 465 good 7 | 6 652 528 good 8 | 7 655 496 good 9 | 8 660 471 good 10 | 9 688 408 good 11 | 10 696 399 good 12 | 11 708 387 good 13 | 12 701 434 good 14 | 13 715 506 good 15 | 14 732 485 good 16 | 15 731 395 good 17 | 16 749 398 good 18 | 17 759 512 good 19 | 18 773 431 good 20 | 19 782 456 good 21 | 20 797 476 good 22 | 21 794 421 good 23 | 22 824 452 good 24 | 23 835 441 good 25 | 24 862 372 good 26 | 25 879 340 good 27 | 26 892 370 good 28 | 27 913 373 good 29 | 28 933 330 good 30 | 29 562 309 faulty 31 | 30 578 346 faulty 32 | 31 593 357 faulty 33 | 32 626 341 faulty 34 | 33 635 252 faulty 35 | 34 658 235 faulty 36 | 35 663 299 faulty 37 | 36 677 223 faulty 38 | 37 685 303 faulty 39 | 38 698 197 faulty 40 | 39 699 311 faulty 41 | 40 712 257 faulty 42 | 41 722 193 faulty 43 | 42 735 259 faulty 44 | 43 738 314 faulty 45 | 44 753 113 faulty 46 | 45 767 286 faulty 47 | 46 771 264 faulty 48 | 47 780 137 faulty 49 | 48 784 131 faulty 50 | 49 798 132 faulty 51 | 50 820 152 faulty 52 | 51 834 157 faulty 53 | 52 858 163 faulty 54 | 53 888 91 faulty 55 | 54 891 156 faulty 56 | 55 911 79 faulty 57 | 56 939 99 faulty 58 | -------------------------------------------------------------------------------- /code/ch99/teamlab_classifier.py: -------------------------------------------------------------------------------- 1 | class SoftmaxRegressionClassifier(Object): 2 | 3 | def softmax(): 4 | return value 5 | 6 | def loss(): 7 | return loss 8 | 9 | def fit(): 10 | return value 11 | 12 | def predict(): 13 | return value 14 | -------------------------------------------------------------------------------- /code/test.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /documents/How_to_use_spark-sklearn_using_Google_Dataproc(kor).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "nbpresent": { 7 | "id": "1a8b9591-680e-4d1d-ba06-08bd8d2e77cf" 8 | } 9 | }, 10 | "source": [ 11 | "## Tutorial: How to use spark-sklearn using Google Datarproc\n", 12 | "-------------------------------\n", 13 | "\n", 14 | "### Overview\n", 15 | "- 본 문서는 Google의 Hadoop/Spark 분산 머신 클라우드 서비스인 Google Dataproc을 사용하여, Spark상에서 Scikit-Learn의 Hyper Parameter 최적화를 위해 Grid Search를 활용하는 예제에 대한 튜토리얼이다\n", 16 | "- 본 Tutorial은 아래와 같은 내용을 포함한다.\n", 17 | " - gcloud를 활용한 Google Dataproc 클러스터 구성방법\n", 18 | " - 구성된 클러스터위에 Jupyter Notebook을 사용하여 코딩 환경 구성 및 연결하기\n", 19 | " - Spark-sklearn 설치\n", 20 | " - Spark-Sklearn을 활용한 Grid Search 사용 예시" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### 1. 준비 - Gloud 가입하기" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### 2. gcloud util 설치하기" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### 3. Google Dataproc 클러스터 구성하기" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### 4. Jupyter Notebook으로 Dataproc 클러스터 접속하기" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### 5. Spark-Sklearn 설치 및 Grid Search 실시히기" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### References\n", 63 | "- https://databricks.com/blog/2016/02/08/auto-scaling-scikit-learn-with-apache-spark.html\n", 64 | "- https://github.com/databricks/spark-sklearn\n", 65 | "- https://blog.sourced.tech/post/dataproc_jupyter/\n", 66 | "- https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/tree/master/jupyter" 67 | ] 68 | } 69 | ], 70 | "metadata": { 71 | "anaconda-cloud": {}, 72 | "kernelspec": { 73 | "display_name": "Python [default]", 74 | "language": "python", 75 | "name": "python3" 76 | }, 77 | "language_info": { 78 | "codemirror_mode": { 79 | "name": "ipython", 80 | "version": 3.0 81 | }, 82 | "file_extension": ".py", 83 | "mimetype": "text/x-python", 84 | "name": "python", 85 | "nbconvert_exporter": "python", 86 | "pygments_lexer": "ipython3", 87 | "version": "3.5.3" 88 | }, 89 | "nbpresent": { 90 | "slides": { 91 | "042a2c47-7c36-4f55-be80-69d72de25b27": { 92 | "id": "042a2c47-7c36-4f55-be80-69d72de25b27", 93 | "prev": "57a506bf-e894-4f47-bc2a-0550e1371f8e", 94 | "regions": {} 95 | }, 96 | "57a506bf-e894-4f47-bc2a-0550e1371f8e": { 97 | "id": "57a506bf-e894-4f47-bc2a-0550e1371f8e", 98 | "prev": null, 99 | "regions": {} 100 | } 101 | }, 102 | "themes": {} 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 0 107 | } -------------------------------------------------------------------------------- /documents/test.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/documents/test.md -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/linux_mac/install.sh: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/linux_mac/numpy_lab.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def n_size_ndarray_creation(n, dtype=np.int): 5 | pass 6 | 7 | 8 | def zero_or_one_or_empty_ndarray(shape, type=0, dtype=np.int): 9 | pass 10 | 11 | 12 | def change_shape_of_ndarray(X, n_row): 13 | pass 14 | 15 | 16 | def concat_ndarray(X_1, X_2, axis): 17 | pass 18 | 19 | 20 | def normalize_ndarray(X, axis=99, dtype=np.float32): 21 | pass 22 | 23 | 24 | def save_ndarray(X, filename="test.npy"): 25 | pass 26 | 27 | 28 | def boolean_index(X, condition): 29 | pass 30 | 31 | 32 | def find_nearest_value(X, target_value): 33 | pass 34 | 35 | 36 | def get_n_largest_values(X, n): 37 | pass 38 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/linux_mac/submit.sh: -------------------------------------------------------------------------------- 1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 2 | export BACKEND_SECRET_KEY=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 3 | if [ $# -eq 0 ] 4 | then 5 | echo "Please give hash key as argument." 6 | else 7 | backend.ai run --exec "python test.py numpy_lab.py $1" python3 test.py numpy_lab.py 8 | fi 9 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/linux_mac/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_numpy_lab.py') as response: 60 | test_code = response.read() 61 | 62 | test_module = types.ModuleType( 63 | 'test_code', 64 | doc='Test case') 65 | 66 | exec(test_code, test_module.__dict__) 67 | sys.modules['test_code'] = test_module 68 | 69 | import test_code as tc 70 | loader = unittest.loader.defaultTestLoader 71 | null_stream = open(os.devnull, "w") 72 | test_suite = loader.loadTestsFromModule(tc) 73 | result = unittest.TextTestRunner( 74 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 75 | 76 | print("Generating result sheet...") 77 | print("-------------------------------------------------------------------") 78 | print(" Test Case | Passed? | Feedback") 79 | print("-------------------------------------------------------------------") 80 | for c, r in result.tests_run: 81 | print("{0:s} | {1:s} | {2} ".format( 82 | c.rsplit('.', 1)[1].rjust(26), 83 | "PASSED" if r == 1 else "FAILED", 84 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 85 | 86 | # print(json.dumps(result.tests_run)) 87 | print("Reading source file...") 88 | 89 | file = open(filename, "r") 90 | print("Transferring results to server...") 91 | payload = { 92 | 'hashkey': hashkey, 93 | 'result': result.tests_run, 94 | 'code': file.read() 95 | } 96 | try: 97 | data = urllib.parse.urlencode(payload) 98 | data = data.encode('ascii') 99 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 100 | with urllib.request.urlopen(req) as response: 101 | resp = response.read() 102 | 103 | if json.loads(resp)['result'] == 0: 104 | print("Transfer failed: hash key is already used.") 105 | else: 106 | print("Transfer completed.") 107 | 108 | except Exception as e: 109 | print("Error occurred on transferring.", e) 110 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/numpy_lab.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/1_lab_numpy/numpy_lab.pdf -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/numpy_lab.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def n_size_ndarray_creation(n, dtype=np.int): 5 | pass 6 | 7 | 8 | def zero_or_one_or_empty_ndarray(shape, type=0, dtype=np.int): 9 | pass 10 | 11 | 12 | def change_shape_of_ndarray(X, n_row): 13 | pass 14 | 15 | 16 | def concat_ndarray(X_1, X_2, axis): 17 | pass 18 | 19 | 20 | def normalize_ndarray(X, axis=99, dtype=np.float32): 21 | pass 22 | 23 | 24 | def save_ndarray(X, filename="test.npy"): 25 | pass 26 | 27 | 28 | def boolean_index(X, condition): 29 | pass 30 | 31 | 32 | def find_nearest_value(X, target_value): 33 | pass 34 | 35 | 36 | def get_n_largest_values(X, n): 37 | pass 38 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/windows/install.bat: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/windows/numpy_lab.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def n_size_ndarray_creation(n, dtype=np.int): 5 | pass 6 | 7 | 8 | def zero_or_one_or_empty_ndarray(shape, type=0, dtype=np.int): 9 | pass 10 | 11 | 12 | def change_shape_of_ndarray(X, n_row): 13 | pass 14 | 15 | 16 | def concat_ndarray(X_1, X_2, axis): 17 | pass 18 | 19 | 20 | def normalize_ndarray(X, axis=99, dtype=np.float32): 21 | pass 22 | 23 | 24 | def save_ndarray(X, filename="test.npy"): 25 | pass 26 | 27 | 28 | def boolean_index(X, condition): 29 | pass 30 | 31 | 32 | def find_nearest_value(X, target_value): 33 | pass 34 | 35 | 36 | def get_n_largest_values(X, n): 37 | pass 38 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/windows/submit.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 4 | set BACKEND_SECRET_KEY=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 5 | 6 | set tmp="%1" 7 | if "%tmp:"=.%"==".." ( 8 | echo "Please give hash key as argument." 9 | ) else ( 10 | backend.ai run --exec "python test.py numpy_lab.py %tmp%" python3 test.py numpy_lab.py 11 | ) 12 | -------------------------------------------------------------------------------- /lab_asssigment/1_lab_numpy/windows/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_numpy_lab.py') as response: 60 | test_code = response.read() 61 | 62 | test_module = types.ModuleType( 63 | 'test_code', 64 | doc='Test case') 65 | 66 | exec(test_code, test_module.__dict__) 67 | sys.modules['test_code'] = test_module 68 | 69 | import test_code as tc 70 | loader = unittest.loader.defaultTestLoader 71 | null_stream = open(os.devnull, "w") 72 | test_suite = loader.loadTestsFromModule(tc) 73 | result = unittest.TextTestRunner( 74 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 75 | 76 | print("Generating result sheet...") 77 | print("-------------------------------------------------------------------") 78 | print(" Test Case | Passed? | Feedback") 79 | print("-------------------------------------------------------------------") 80 | for c, r in result.tests_run: 81 | print("{0:s} | {1:s} | {2} ".format( 82 | c.rsplit('.', 1)[1].rjust(26), 83 | "PASSED" if r == 1 else "FAILED", 84 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 85 | 86 | # print(json.dumps(result.tests_run)) 87 | print("Reading source file...") 88 | 89 | file = open(filename, "r") 90 | print("Transferring results to server...") 91 | payload = { 92 | 'hashkey': hashkey, 93 | 'result': result.tests_run, 94 | 'code': file.read() 95 | } 96 | try: 97 | data = urllib.parse.urlencode(payload) 98 | data = data.encode('ascii') 99 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 100 | with urllib.request.urlopen(req) as response: 101 | resp = response.read() 102 | 103 | if json.loads(resp)['result'] == 0: 104 | print("Transfer failed: hash key is already used.") 105 | else: 106 | print("Transfer completed.") 107 | 108 | except Exception as e: 109 | print("Error occurred on transferring.", e) 110 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/README.md: -------------------------------------------------------------------------------- 1 | Lab - Build a matrix 2 | =============================== 3 | Copyright 2018 © document created by teamLab.gachon@gmail.com 4 | 5 | ## Introduction 6 | 7 | [PDF 파일 다운로드](https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/build_matrix.pdf) 8 | 9 | Machin Learning의 두 번째 랩은 Pandas와 Numpy를 활용하여 Rating Matrix 또는 Frequent Matrix를 만드는 것입니다. 추천 시스템 개발 등 머신러닝을 하다 보면 누가, 어떤 물건(또는 서비스)를 얼마나 이용하고 평가 하였는 가를 Matrix 형태로 변형하여 분석하는 일이 많은데 이를 위한 전처리 과정이 필요합니다. 흔한 예제로 생각해보면 영화를 본 사용자들이 각 영화를 평가한 별점 점수를 Matrix 형태로 표현하는 것이 있습니다. 일반적으로 데이터 베이스는 저장 공간의 효율성을 위해 Matrix 형태로 저장하는 것이 아니라 Event과 발생한 정보를 Row 단위로 저장합니다. 이렇게 DB에 쌓인 정보를 Matrix로 변환하는 게 이번 랩의 목표입니다. 실제 데이터의 변환은 아래 그림과 같습니다. 10 | 11 | ![matrix](images/2018/01/matrix.png) 12 | 13 | ## backend.ai 설치 14 | 숙제를 제출하기 앞서, [레블업](http://www.lablup.com/)의 backend.ai를 여러분의 파이썬에 설치하셔야 합니다. 설치하는 과정은 매우 쉽습니다. 아래처럼 터미널 또는 cmd 창에서 입력을 하시면 됩니다. 15 | 16 | ```bash 17 | pip install backend.ai-client 18 | ``` 19 | 20 | ## 숙제 파일(lab_bulid_matrix.zip) 다운로드 21 | 먼저 해야 할 일은 숙제 파일을 다운로드 받는 것 입니다. 아래링크를 다운로드 하거나 Chrome 또는 익스플로러와 같은 웹 브라우저 주소창에 아래 주소를 입력합니다. 22 | 23 | - 링크 [2_lab_bulid_matrix.zip](https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/2_lab_build_matrix.zip) 24 | - https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/2_lab_build_matrix.zip 25 | 26 | 또는 Mac OS에서는 아래 명령을 쓰셔도 됩니다. 27 | ```bash 28 | wget https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/2_lab_bulid_matrix.zip 29 | ``` 30 | 31 | 다운로드 된 `2_lab_bulid_matrix.zip` 파일을 작업 폴더로 이동한 후 압축해제 후 작업하시길 바랍니다. 32 | 압축해제 하면 폴더가 `linux_mac`과 `windows`로 나눠져 있습니다. 자신의 OS에 맞는 폴더로 이동해서 코드를 수정해 주시기 바랍니다. 33 | 34 | 35 | ## bulid_matrix.py 코드 구조 36 | 본 Lab은 Pandas의 기본적인 동작과 Numpy를 결합하여 일반적으로 쌓여있는 데이터를 Matrix 형태로 변경합니다. 변환되는 Matrix 형태는 두가지이며, 본 Lab도 두 가지 모두를 지원하는 것을 목표로 합니다. 37 | 38 | #### get_rating_matrix 39 | 첫 번째 함수는 Rating Matix을 만드는 것 입니다. Rating Matrix는 영화, 책 처럼 사용자가 제품에 대한 평가를 Matrix 형태로 표현한 것입니다. 저희는 `movie_rating.csv`라는 파일을 활용하여 rating matrix를 구성한다. `movie_rating.csv`는 아래처럼 구성된다. 40 | 41 | source |target | rating 42 | --|---|-- 43 | Mick LaSalle|Superman Returns|3.0 44 | Mick LaSalle|The Night Listener|3.0 45 | Claudia Puig|Snakes on a Plane|3.5 46 | Claudia Puig|Just My Luck|3.0 47 | Claudia Puig|The Night Listener|4.5 48 | Lisa Rose|Lady in the Water|2.5 49 | Lisa Rose|Snakes on a Plane|3.5 50 | 51 | 본 랩에서 다루는 모든 csv파일의 column은 source, target으로 구성되며, source는 row의 index 정보가, target는 column의 기준 정보가 된다. rating 정보는 `get_rating_matrix` 함수에서만 사용되며, 사용자가 영화에 대한 평가를 정보를 담고 있다. 52 | 53 | 본 랩의 목적은 위 테이블과 같이 구성된 정보를 Matrix 형태로 바꾸는 거다. Matrix 형태로 바꾸는 규칙은 다음과 같다. 54 | - source는 row, target은 column의 기준이 된다. 55 | - source와 target의 정렬된 값을 활용하여 index를 설정한다. 즉 위 Table에서는 `Claudia Puig`과 row의 0번째 index로 설정된다. 56 | - rating의 정보는 Matrix에서 각 Element 값에 할당된다. 57 | - 생성되는 Matrix Ndarray로 나타난다. 58 | - dict, collection 모듈 등 파이썬의 Built-in Module은 사용할 수 있으나, for 문은 사용할 수 없다. 59 | 60 | 생성하는 함수의 Template은 아래와 같으며, 입력값은 처리하는 csv 파일의 이름만 넣을 수 있다. 61 | ```python 62 | def get_rating_matrix(filename): 63 | pass 64 | ``` 65 | 실제한 구현한 예제와 결과물은 아래와 같다. 66 | ```python 67 | >>> import numpy as np 68 | >>> import build_matrix as test_code 69 | >>> test_code.get_rating_matrix("movie_rating.csv") 70 | array([[ 3. , 0. , 3.5, 0. , 4.5, 0. ], 71 | [ 0. , 3. , 3.5, 0. , 3. , 3.5], 72 | [ 0. , 3. , 4. , 5. , 3. , 3.5], 73 | [ 3. , 2.5, 3.5, 3.5, 3. , 2.5], 74 | [ 2. , 3. , 4. , 3. , 3. , 0. ], 75 | [ 0. , 0. , 4.5, 4. , 0. , 0. ]], dtype=float32) 76 | ``` 77 | 78 | #### get_frequent_matrix 79 | 두 번째 함수는 얼마나 빈번하게 제품을 구매했는지를 표현하는 Frequent Matrix를 만드는 것 입니다. Frequent Matrix는 사용자가 특정 제품을 구매한 횟수를 기록하는 Matrix이다. 저희가 제공하는 csv파일은 `1000i.csv`라는 파일로 아래처럼 구성되어 있습니다. 80 | 81 | source |target 82 | --|--- 83 | source,target 84 | 3|7 85 | 4|15 86 | 2|49 87 | 5|44 88 | 1|1 89 | 2|19 90 | 4|22 91 | 4|34 92 | 4|40 93 | 5|31 94 | 4|17 95 | 5|16 96 | 2|43 97 | 5|20 98 | 3|48 99 | 100 | 본 함수에서는 기존 함수와 달리 Rating column이 없습니다. 대시신 source와 target의 조합이 한 개 이상으로 중복될 수 있고, 이것이 Frequent로 처리해야 합니다. 즉 Rating이 명시적으로 있는게 아니라 데이터를 통해 Frequent를 찾아내는 것이 목적입니다. Matrix 형태로 바꾸는 규칙은 다음과 같습니다. 101 | - source는 row, target은 column의 기준이 된다. 102 | - source와 target의 정렬된 값을 활용하여 index를 설정한다. 즉 위 Table에서는 `1`은 row의 0번째 index로 설정된다. 103 | - Source와 Target이 출현한 정보는 Frequent로 Matrix에서 각 Element 값에 할당되어야 한다. 104 | - 생성되는 Matrix Ndarray로 나타내며, dtype은 np.float32 105 | - dict, collection 모듈 등 파이썬의 Built-in Module은 사용할 수 있으나, for 문은 사용할 수 없다. 106 | 107 | 108 | 생성하는 함수의 Template은 아래와 같으며, 입력값은 처리하는 csv 파일의 이름만 넣을 수 있다. 109 | ```python 110 | def get_frequent_matrix(filename): 111 | pass 112 | ``` 113 | 실제한 구현한 예제와 결과물은 아래와 같다. 114 | ```python 115 | >>> import numpy as np 116 | >>> import build_matrix as test_code 117 | >>> test_code.get_frequent_matrix("1000i.csv") 118 | array([[ 19., 17., 14., 11., 17., 25., 7., 22., 5., 18., 10., 119 | 13., 13., 8., 20., 10., 9., 10., 16., 15., 9., 11., 120 | 17., 15., 14., 8., 6., 12., 18., 12., 6., 18., 9., 121 | 24., 7., 19., 14., 6., 4., 12., 15., 14., 20., 9., 122 | 12., 16., 11., 9., 11., 12.], 123 | [ 20., 16., 10., 15., 17., 18., 10., 13., 5., 19., 8., 124 | 14., 14., 9., 15., 14., 13., 8., 12., 9., 5., 10., 125 | 28., 18., 7., 8., 6., 19., 14., 13., 11., 12., 18., 126 | 15., 7., 11., 17., 9., 5., 5., 13., 12., 15., 9., 127 | 13., 16., 16., 10., 16., 9.], 128 | [ 12., 16., 13., 19., 23., 19., 5., 14., 5., 18., 7., 129 | 6., 14., 8., 20., 17., 14., 11., 16., 12., 7., 9., 130 | 23., 12., 12., 8., 7., 23., 26., 10., 9., 20., 16., 131 | 11., 4., 19., 12., 12., 5., 10., 10., 14., 10., 17., 132 | 15., 16., 11., 17., 9., 11.], 133 | [ 14., 14., 19., 11., 11., 18., 7., 16., 7., 17., 6., 134 | 19., 18., 12., 13., 13., 14., 9., 21., 16., 6., 6., 135 | 19., 14., 19., 5., 12., 14., 18., 11., 11., 21., 15., 136 | 10., 11., 14., 17., 21., 6., 14., 9., 16., 18., 12., 137 | 16., 16., 26., 16., 12., 20.], 138 | [ 13., 7., 8., 15., 13., 16., 3., 19., 11., 12., 7., 139 | 10., 13., 14., 16., 14., 23., 9., 13., 10., 11., 3., 140 | 11., 14., 9., 6., 11., 16., 18., 11., 5., 14., 10., 141 | 16., 10., 5., 14., 11., 3., 9., 11., 10., 16., 8., 142 | 13., 20., 14., 18., 21., 3.]], dtype=float32) 143 | ``` 144 | 145 | ### 숙제 template 파일 제출하기 (윈도우의 경우) 146 | 1. windows+r를 누르고 cmd 입력 후 확인을 클릭합니다. 147 | 2. 작업을 수행한 폴더로 이동 합니다. 148 | 3. 밑에 명령어를 cmd창에 입력합니다. 149 | ```bash 150 | install.bat 151 | submit.bat [YOUR_HASH_KEY] 152 | ``` 153 | 154 | ### 숙제 template 파일 제출하기 (Mac or Linux) 155 | 1. 터미널을 구동합니다. 156 | 2. 작업을 수행한 디렉토리로로 이동 합니다. 157 | 3. 밑에 bash창을 입력합니다. 158 | ```bash 159 | bash install.sh 160 | bash submit.sh [YOUR_HASH_KEY] 161 | ``` 162 | > backend.ai 서비스의 업데이트에 의해 실행전 반드시 `bash install.sh` 또는 `install.bat` 수행을 바랍니다. 163 | 164 | ## Next Work 165 | 고생하셨습니다. Numpy와 Pandas를 함께 해야함 성공할 수 있는 랩입니다. 아직 Matrix와 Vector 데이터를 핸들링하는 방법이 익숙하지 않았다면 상당히 어렵게 푸셨을 것 같습니다. 그럼에도 불구하고, 우리는 계속 전진해야 합니다. Code가 당신과 함께 하길... 166 | 167 | > **Human knowledge belongs to the world** - from movie 'Password' - 168 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/build_matrix.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/2_lab_build_matrix/build_matrix.pdf -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/build_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def get_rating_matrix(filename, dtype=np.float32): 6 | pass 7 | 8 | 9 | def get_frequent_matrix(filename, dtype=np.float32): 10 | pass 11 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/images/2018/01/matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/2_lab_build_matrix/images/2018/01/matrix.png -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/linux_mac/build_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def get_rating_matrix(filename, dtype=np.float32): 6 | pass 7 | 8 | 9 | def get_frequent_matrix(filename, dtype=np.float32): 10 | pass 11 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/linux_mac/install.sh: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/linux_mac/submit.sh: -------------------------------------------------------------------------------- 1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 2 | export BACKEND_SECRET_KEY=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 3 | if [ $# -eq 0 ] 4 | then 5 | echo "Please give hash key as argument." 6 | else 7 | backend.ai run --exec "python test.py build_matrix.py $1" python3 test.py build_matrix.py 8 | fi 9 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/linux_mac/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_build_matrix.py') as response: 60 | test_code = response.read() 61 | 62 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/movie_rating.csv') as response: 63 | example_txt = response.read().decode('utf-8') 64 | f = open("movie_rating.csv", "w") 65 | f.write(example_txt) 66 | f.close() 67 | 68 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/1000i.csv') as response: 69 | example_txt = response.read().decode('utf-8') 70 | f = open("1000i.csv", "w") 71 | f.write(example_txt) 72 | f.close() 73 | 74 | 75 | test_module = types.ModuleType( 76 | 'test_code', 77 | doc='Test case') 78 | 79 | exec(test_code, test_module.__dict__) 80 | sys.modules['test_code'] = test_module 81 | 82 | import test_code as tc 83 | loader = unittest.loader.defaultTestLoader 84 | null_stream = open(os.devnull, "w") 85 | test_suite = loader.loadTestsFromModule(tc) 86 | result = unittest.TextTestRunner( 87 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 88 | 89 | print("Generating result sheet...") 90 | print("-------------------------------------------------------------------") 91 | print(" Test Case | Passed? | Feedback") 92 | print("-------------------------------------------------------------------") 93 | for c, r in result.tests_run: 94 | print("{0:s} | {1:s} | {2} ".format( 95 | c.rsplit('.', 1)[1].rjust(26), 96 | "PASSED" if r == 1 else "FAILED", 97 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 98 | 99 | # print(json.dumps(result.tests_run)) 100 | print("Reading source file...") 101 | 102 | file = open(filename, "r") 103 | print("Transferring results to server...") 104 | payload = { 105 | 'hashkey': hashkey, 106 | 'result': result.tests_run, 107 | 'code': file.read() 108 | } 109 | try: 110 | data = urllib.parse.urlencode(payload) 111 | data = data.encode('ascii') 112 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 113 | with urllib.request.urlopen(req) as response: 114 | resp = response.read() 115 | 116 | if json.loads(resp)['result'] == 0: 117 | print("Transfer failed: hash key is already used.") 118 | else: 119 | print("Transfer completed.") 120 | 121 | except Exception as e: 122 | print("Error occurred on transferring.", e) 123 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/movie_rating.csv: -------------------------------------------------------------------------------- 1 | source,target,rating 2 | Jack Matthews,Lady in the Water,3.0 3 | Jack Matthews,Snakes on a Plane,4.0 4 | Jack Matthews,You Me and Dupree,3.5 5 | Jack Matthews,Superman Returns,5.0 6 | Jack Matthews,The Night Listener,3.0 7 | Mick LaSalle,Lady in the Water,3.0 8 | Mick LaSalle,Snakes on a Plane,4.0 9 | Mick LaSalle,Just My Luck,2.0 10 | Mick LaSalle,Superman Returns,3.0 11 | Mick LaSalle,The Night Listener,3.0 12 | Claudia Puig,Snakes on a Plane,3.5 13 | Claudia Puig,Just My Luck,3.0 14 | Claudia Puig,The Night Listener,4.5 15 | Lisa Rose,Lady in the Water,2.5 16 | Lisa Rose,Snakes on a Plane,3.5 17 | Lisa Rose,Just My Luck,3.0 18 | Lisa Rose,Superman Returns,3.5 19 | Lisa Rose,The Night Listener,3.0 20 | Lisa Rose,You Me and Dupree,2.5 21 | Toby,Snakes on a Plane,4.5 22 | Toby,Superman Returns,4.0 23 | Gene Seymour,Lady in the Water,3.0 24 | Gene Seymour,Snakes on a Plane,3.5 25 | Gene Seymour,You Me and Dupree,3.5 26 | Gene Seymour,The Night Listener,3.0 27 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/windows/build_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def get_rating_matrix(filename, dtype=np.float32): 6 | pass 7 | 8 | 9 | def get_frequent_matrix(filename, dtype=np.float32): 10 | pass 11 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/windows/install.bat: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/windows/submit.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 4 | set BACKEND_SECRET_KEY=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 5 | 6 | set tmp="%1" 7 | if "%tmp:"=.%"==".." ( 8 | echo "Please give hash key as argument." 9 | ) else ( 10 | backend.ai run --exec "python test.py build_matrix.py %tmp%" python3 test.py build_matrix.py 11 | ) 12 | -------------------------------------------------------------------------------- /lab_asssigment/2_lab_build_matrix/windows/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_build_matrix.py') as response: 60 | test_code = response.read() 61 | 62 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/movie_rating.csv') as response: 63 | example_txt = response.read().decode('utf-8') 64 | f = open("movie_rating.csv", "w") 65 | f.write(example_txt) 66 | f.close() 67 | 68 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/1000i.csv') as response: 69 | example_txt = response.read().decode('utf-8') 70 | f = open("1000i.csv", "w") 71 | f.write(example_txt) 72 | f.close() 73 | 74 | 75 | test_module = types.ModuleType( 76 | 'test_code', 77 | doc='Test case') 78 | 79 | exec(test_code, test_module.__dict__) 80 | sys.modules['test_code'] = test_module 81 | 82 | import test_code as tc 83 | loader = unittest.loader.defaultTestLoader 84 | null_stream = open(os.devnull, "w") 85 | test_suite = loader.loadTestsFromModule(tc) 86 | result = unittest.TextTestRunner( 87 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 88 | 89 | print("Generating result sheet...") 90 | print("-------------------------------------------------------------------") 91 | print(" Test Case | Passed? | Feedback") 92 | print("-------------------------------------------------------------------") 93 | for c, r in result.tests_run: 94 | print("{0:s} | {1:s} | {2} ".format( 95 | c.rsplit('.', 1)[1].rjust(26), 96 | "PASSED" if r == 1 else "FAILED", 97 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 98 | 99 | # print(json.dumps(result.tests_run)) 100 | print("Reading source file...") 101 | 102 | file = open(filename, "r") 103 | print("Transferring results to server...") 104 | payload = { 105 | 'hashkey': hashkey, 106 | 'result': result.tests_run, 107 | 'code': file.read() 108 | } 109 | try: 110 | data = urllib.parse.urlencode(payload) 111 | data = data.encode('ascii') 112 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 113 | with urllib.request.urlopen(req) as response: 114 | resp = response.read() 115 | 116 | if json.loads(resp)['result'] == 0: 117 | print("Transfer failed: hash key is already used.") 118 | else: 119 | print("Transfer completed.") 120 | 121 | except Exception as e: 122 | print("Error occurred on transferring.", e) 123 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/lab_linear_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/5_normal_equation/lab_linear_model.pdf -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linear_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class LinearRegression(object): 4 | def __init__(self, fit_intercept=True, copy_X=True): 5 | self.fit_intercept = fit_intercept 6 | self.copy_X = copy_X 7 | 8 | self._coef = None 9 | self._intercept = None 10 | self._new_X = None 11 | 12 | def fit(self, X, y): 13 | pass 14 | 15 | def predict(self, X): 16 | pass 17 | 18 | @property 19 | def coef(self): 20 | return self._coef 21 | 22 | @property 23 | def intercept(self): 24 | return self._intercept 25 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linux_mac/install.sh: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linux_mac/linear_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinearRegression(object): 5 | def __init__(self, fit_intercept=True, copy_X=True): 6 | self.fit_intercept = fit_intercept 7 | self.copy_X = copy_X 8 | 9 | self._coef = None 10 | self._intercept = None 11 | self._new_X = None 12 | 13 | def fit(self, X, y): 14 | pass 15 | 16 | def predict(self, X): 17 | pass 18 | 19 | @property 20 | def coef(self): 21 | return self._coef 22 | 23 | @property 24 | def intercept(self): 25 | return self._intercept 26 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linux_mac/mlr09.csv: -------------------------------------------------------------------------------- 1 | height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored 2 | 6.8,225,0.442,0.672,9.2 6.3,180,0.435,0.797,11.7 6.4,190,0.456,0.761,15.8 6.2,180,0.416,0.651,8.6 6.9,205,0.449,0.9,23.2 6.4,225,0.431,0.78,27.4 6.3,185,0.487,0.771,9.3 6.8,235,0.469,0.75,16 6.9,235,0.435,0.818,4.7 6.7,210,0.48,0.825,12.5 6.9,245,0.516,0.632,20.1 6.9,245,0.493,0.757,9.1 6.3,185,0.374,0.709,8.1 6.1,185,0.424,0.782,8.6 6.2,180,0.441,0.775,20.3 6.8,220,0.503,0.88,25 6.5,194,0.503,0.833,19.2 7.6,225,0.425,0.571,3.3 6.3,210,0.371,0.816,11.2 7.1,240,0.504,0.714,10.5 6.8,225,0.4,0.765,10.1 7.3,263,0.482,0.655,7.2 6.4,210,0.475,0.244,13.6 6.8,235,0.428,0.728,9 7.2,230,0.559,0.721,24.6 6.4,190,0.441,0.757,12.6 6.6,220,0.492,0.747,5.6 6.8,210,0.402,0.739,8.7 6.1,180,0.415,0.713,7.7 6.5,235,0.492,0.742,24.1 6.4,185,0.484,0.861,11.7 6,175,0.387,0.721,7.7 6,192,0.436,0.785,9.6 7.3,263,0.482,0.655,7.2 6.1,180,0.34,0.821,12.3 6.7,240,0.516,0.728,8.9 6.4,210,0.475,0.846,13.6 5.8,160,0.412,0.813,11.2 6.9,230,0.411,0.595,2.8 7,245,0.407,0.573,3.2 7.3,228,0.445,0.726,9.4 5.9,155,0.291,0.707,11.9 6.2,200,0.449,0.804,15.4 6.8,235,0.546,0.784,7.4 7,235,0.48,0.744,18.9 5.9,105,0.359,0.839,7.9 6.1,180,0.528,0.79,12.2 5.7,185,0.352,0.701,11 7.1,245,0.414,0.778,2.8 5.8,180,0.425,0.872,11.8 7.4,240,0.599,0.713,17.1 6.8,225,0.482,0.701,11.6 6.8,215,0.457,0.734,5.8 7,230,0.435,0.764,8.3 3 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linux_mac/submit.sh: -------------------------------------------------------------------------------- 1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 2 | export BACKEND_SECRET_KEY=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 3 | if [ $# -eq 0 ] 4 | then 5 | echo "Please give hash key as argument." 6 | else 7 | backend.ai run --exec "python test.py linear_model.py $1" python3 test.py linear_model.py test.csv train.csv mlr09.csv 8 | fi 9 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linux_mac/test.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 77,79.77515201 3 | 21,23.17727887 4 | 22,25.60926156 5 | 20,17.85738813 6 | 36,41.84986439 7 | 15,9.805234876 8 | 62,58.87465933 9 | 95,97.61793701 10 | 20,18.39512747 11 | 5,8.746747654 12 | 4,2.811415826 13 | 19,17.09537241 14 | 96,95.14907176 15 | 62,61.38800663 16 | 36,40.24701716 17 | 15,14.82248589 18 | 65,66.95806869 19 | 14,16.63507984 20 | 87,90.65513736 21 | 69,77.22982636 22 | 89,92.11906278 23 | 51,46.91387709 24 | 89,89.82634442 25 | 27,21.71380347 26 | 97,97.41206981 27 | 58,57.01631363 28 | 79,78.31056542 29 | 21,19.1315097 30 | 93,93.03483388 31 | 27,26.59112396 32 | 99,97.55155344 33 | 31,31.43524822 34 | 33,35.12724777 35 | 80,78.61042432 36 | 28,33.07112825 37 | 47,51.69967172 38 | 53,53.62235225 39 | 69,69.46306072 40 | 28,27.42497237 41 | 33,36.34644189 42 | 91,95.06140858 43 | 71,68.16724757 44 | 50,50.96155532 45 | 76,78.04237454 46 | 4,5.607664865 47 | 37,36.11334779 48 | 70,67.2352155 49 | 68,65.01324035 50 | 40,38.14753871 51 | 35,34.31141446 52 | 94,95.28503937 53 | 88,87.84749912 54 | 52,54.08170635 55 | 31,31.93063515 56 | 59,59.61247085 57 | 0,-1.040114209 58 | 39,47.49374765 59 | 64,62.60089773 60 | 69,70.9146434 61 | 57,56.14834113 62 | 13,14.05572877 63 | 72,68.11367147 64 | 76,75.59701346 65 | 61,59.225745 66 | 82,85.45504157 67 | 18,17.76197116 68 | 41,38.68888682 69 | 50,50.96343637 70 | 55,51.83503872 71 | 13,17.0761107 72 | 46,46.56141773 73 | 13,10.34754461 74 | 79,77.91032969 75 | 53,50.17008622 76 | 15,13.25690647 77 | 28,31.32274932 78 | 81,73.9308764 79 | 69,74.45114379 80 | 52,52.01932286 81 | 84,83.68820499 82 | 68,70.3698748 83 | 27,23.44479161 84 | 56,49.83051801 85 | 48,49.88226593 86 | 40,41.04525583 87 | 39,33.37834391 88 | 82,81.29750133 89 | 100,105.5918375 90 | 59,56.82457013 91 | 43,48.67252645 92 | 67,67.02150613 93 | 38,38.43076389 94 | 63,58.61466887 95 | 91,89.12377509 96 | 60,60.9105427 97 | 14,13.83959878 98 | 21,16.89085185 99 | 87,84.06676818 100 | 73,70.34969772 101 | 32,33.38474138 102 | 2,-1.63296825 103 | 82,88.54475895 104 | 19,17.44047622 105 | 74,75.69298554 106 | 42,41.97607107 107 | 12,12.59244741 108 | 1,0.275307261 109 | 90,98.13258005 110 | 89,87.45721555 111 | 0,-2.344738542 112 | 41,39.3294153 113 | 16,16.68715211 114 | 94,96.58888601 115 | 97,97.70342201 116 | 66,67.01715955 117 | 24,25.63476257 118 | 17,13.41310757 119 | 90,95.15647284 120 | 13,9.744164258 121 | 0,-3.467883789 122 | 64,62.82816355 123 | 96,97.27405461 124 | 98,95.58017185 125 | 12,7.468501839 126 | 41,45.44599591 127 | 47,46.69013968 128 | 78,74.4993599 129 | 20,21.63500655 130 | 89,91.59548851 131 | 29,26.49487961 132 | 64,67.38654703 133 | 75,74.25362837 134 | 12,12.07991648 135 | 25,21.32273728 136 | 28,29.31770045 137 | 30,26.48713683 138 | 65,68.94699774 139 | 59,59.10598995 140 | 64,64.37521087 141 | 53,60.20758349 142 | 71,70.34329706 143 | 97,97.1082562 144 | 73,75.7584178 145 | 9,10.80462727 146 | 12,12.11219941 147 | 63,63.28312382 148 | 99,98.03017721 149 | 60,63.19354354 150 | 35,34.8534823 151 | 2,-2.819913974 152 | 60,59.8313966 153 | 32,29.38505024 154 | 94,97.00148372 155 | 84,85.18657275 156 | 63,61.74063192 157 | 22,18.84798163 158 | 81,78.79008525 159 | 93,95.12400481 160 | 33,30.48881287 161 | 7,10.41468095 162 | 42,38.98317436 163 | 46,46.11021062 164 | 54,52.45103628 165 | 16,21.16523945 166 | 49,52.28620611 167 | 43,44.18863945 168 | 95,97.13832018 169 | 66,67.22008001 170 | 21,18.98322306 171 | 35,24.3884599 172 | 80,79.44769523 173 | 37,40.03504862 174 | 54,53.32005764 175 | 56,54.55446979 176 | 1,-2.761182595 177 | 32,37.80182795 178 | 58,57.48741435 179 | 32,36.06292994 180 | 46,49.83538167 181 | 72,74.68953276 182 | 17,14.86159401 183 | 97,101.0697879 184 | 93,99.43577876 185 | 91,91.69240746 186 | 37,34.12473248 187 | 4,6.079390073 188 | 54,59.07247174 189 | 51,56.43046022 190 | 27,30.49412933 191 | 46,48.35172635 192 | 92,89.73153611 193 | 73,72.86282528 194 | 77,80.97144285 195 | 91,91.36566374 196 | 61,60.07137496 197 | 99,99.87382707 198 | 4,8.655714172 199 | 72,69.39858505 200 | 19,19.38780134 201 | 57,53.11628433 202 | 78,78.39683006 203 | 26,25.75612514 204 | 74,75.07484683 205 | 90,92.88772282 206 | 66,69.45498498 207 | 13,13.12109842 208 | 40,48.09843134 209 | 77,79.3142548 210 | 67,68.48820749 211 | 75,73.2300846 212 | 23,24.68362712 213 | 45,41.90368917 214 | 59,62.22635684 215 | 44,45.96396877 216 | 23,23.52647153 217 | 55,51.80035866 218 | 55,51.10774273 219 | 95,95.79747345 220 | 12,9.241138977 221 | 4,7.646529763 222 | 7,9.281699753 223 | 100,103.5266162 224 | 48,47.41006725 225 | 42,42.03835773 226 | 96,96.11982476 227 | 39,38.05766408 228 | 100,105.4503788 229 | 87,88.80306911 230 | 14,15.49301141 231 | 14,12.42624606 232 | 37,40.00709598 233 | 5,5.634030902 234 | 88,87.36938931 235 | 91,89.73951993 236 | 65,66.61499643 237 | 74,72.9138853 238 | 56,57.19103506 239 | 16,11.21710477 240 | 5,0.676076749 241 | 28,28.15668543 242 | 92,95.3958003 243 | 46,52.05490703 244 | 54,59.70864577 245 | 39,36.79224762 246 | 44,37.08457698 247 | 31,24.18437976 248 | 68,67.28725332 249 | 86,82.870594 250 | 90,89.899991 251 | 38,36.94173178 252 | 21,19.87562242 253 | 95,90.71481654 254 | 56,61.09367762 255 | 60,60.11134958 256 | 65,64.83296316 257 | 78,81.40381769 258 | 89,92.40217686 259 | 6,2.576625376 260 | 67,63.80768172 261 | 36,38.67780759 262 | 16,16.82839701 263 | 100,99.78687252 264 | 45,44.68913433 265 | 73,71.00377824 266 | 57,51.57326718 267 | 20,19.87846479 268 | 76,79.50341495 269 | 34,34.58876491 270 | 55,55.7383467 271 | 72,68.19721905 272 | 55,55.81628509 273 | 8,9.391416798 274 | 56,56.01448111 275 | 72,77.9969477 276 | 58,55.37049953 277 | 6,11.89457829 278 | 96,94.79081712 279 | 23,25.69041546 280 | 58,53.52042319 281 | 23,18.31396758 282 | 19,21.42637785 283 | 25,30.41303282 284 | 64,67.68142149 285 | 21,17.0854783 286 | 59,60.91792707 287 | 19,14.99514319 288 | 16,16.74923937 289 | 42,41.46923883 290 | 43,42.84526108 291 | 61,59.12912974 292 | 92,91.30863673 293 | 11,8.673336357 294 | 41,39.31485292 295 | 1,5.313686205 296 | 8,5.405220518 297 | 71,68.5458879 298 | 46,47.33487629 299 | 55,54.09063686 300 | 62,63.29717058 301 | 47,52.45946688 -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/linux_mac/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model.py') as response: 60 | test_code = response.read() 61 | 62 | 63 | test_module = types.ModuleType( 64 | 'test_code', 65 | doc='Test case') 66 | 67 | exec(test_code, test_module.__dict__) 68 | sys.modules['test_code'] = test_module 69 | 70 | import test_code as tc 71 | loader = unittest.loader.defaultTestLoader 72 | null_stream = open(os.devnull, "w") 73 | test_suite = loader.loadTestsFromModule(tc) 74 | result = unittest.TextTestRunner( 75 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 76 | 77 | print("Generating result sheet...") 78 | print("-------------------------------------------------------------------") 79 | print(" Test Case | Passed? | Feedback") 80 | print("-------------------------------------------------------------------") 81 | for c, r in result.tests_run: 82 | print("{0:s} | {1:s} | {2} ".format( 83 | c.rsplit('.', 1)[1].rjust(26), 84 | "PASSED" if r == 1 else "FAILED", 85 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 86 | 87 | # print(json.dumps(result.tests_run)) 88 | print("Reading source file...") 89 | 90 | file = open(filename, "r") 91 | print("Transferring results to server...") 92 | payload = { 93 | 'hashkey': hashkey, 94 | 'result': result.tests_run, 95 | 'code': file.read() 96 | } 97 | try: 98 | data = urllib.parse.urlencode(payload) 99 | data = data.encode('ascii') 100 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 101 | with urllib.request.urlopen(req) as response: 102 | resp = response.read() 103 | 104 | if json.loads(resp)['result'] == 0: 105 | print("Transfer failed: hash key is already used.") 106 | else: 107 | print("Transfer completed.") 108 | 109 | except Exception as e: 110 | print("Error occurred on transferring.", e) 111 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/windows/install.bat: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/windows/linear_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinearRegression(object): 5 | def __init__(self, fit_intercept=True, copy_X=True): 6 | self.fit_intercept = fit_intercept 7 | self.copy_X = copy_X 8 | 9 | self._coef = None 10 | self._intercept = None 11 | self._new_X = None 12 | 13 | def fit(self, X, y): 14 | pass 15 | 16 | def predict(self, X): 17 | pass 18 | 19 | @property 20 | def coef(self): 21 | return self._coef 22 | 23 | @property 24 | def intercept(self): 25 | return self._intercept 26 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/windows/mlr09.csv: -------------------------------------------------------------------------------- 1 | height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored 2 | 6.8,225,0.442,0.672,9.2 6.3,180,0.435,0.797,11.7 6.4,190,0.456,0.761,15.8 6.2,180,0.416,0.651,8.6 6.9,205,0.449,0.9,23.2 6.4,225,0.431,0.78,27.4 6.3,185,0.487,0.771,9.3 6.8,235,0.469,0.75,16 6.9,235,0.435,0.818,4.7 6.7,210,0.48,0.825,12.5 6.9,245,0.516,0.632,20.1 6.9,245,0.493,0.757,9.1 6.3,185,0.374,0.709,8.1 6.1,185,0.424,0.782,8.6 6.2,180,0.441,0.775,20.3 6.8,220,0.503,0.88,25 6.5,194,0.503,0.833,19.2 7.6,225,0.425,0.571,3.3 6.3,210,0.371,0.816,11.2 7.1,240,0.504,0.714,10.5 6.8,225,0.4,0.765,10.1 7.3,263,0.482,0.655,7.2 6.4,210,0.475,0.244,13.6 6.8,235,0.428,0.728,9 7.2,230,0.559,0.721,24.6 6.4,190,0.441,0.757,12.6 6.6,220,0.492,0.747,5.6 6.8,210,0.402,0.739,8.7 6.1,180,0.415,0.713,7.7 6.5,235,0.492,0.742,24.1 6.4,185,0.484,0.861,11.7 6,175,0.387,0.721,7.7 6,192,0.436,0.785,9.6 7.3,263,0.482,0.655,7.2 6.1,180,0.34,0.821,12.3 6.7,240,0.516,0.728,8.9 6.4,210,0.475,0.846,13.6 5.8,160,0.412,0.813,11.2 6.9,230,0.411,0.595,2.8 7,245,0.407,0.573,3.2 7.3,228,0.445,0.726,9.4 5.9,155,0.291,0.707,11.9 6.2,200,0.449,0.804,15.4 6.8,235,0.546,0.784,7.4 7,235,0.48,0.744,18.9 5.9,105,0.359,0.839,7.9 6.1,180,0.528,0.79,12.2 5.7,185,0.352,0.701,11 7.1,245,0.414,0.778,2.8 5.8,180,0.425,0.872,11.8 7.4,240,0.599,0.713,17.1 6.8,225,0.482,0.701,11.6 6.8,215,0.457,0.734,5.8 7,230,0.435,0.764,8.3 3 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/windows/submit.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 4 | set BACKEND_SECRET_KEY=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 5 | 6 | set tmp="%1" 7 | if "%tmp:"=.%"==".." ( 8 | echo "Please give hash key as argument." 9 | ) else ( 10 | backend.ai run --exec "python test.py linear_model.py %tmp%" python3 test.py linear_model.py test.csv train.csv mlr09.csv 11 | ) 12 | -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/windows/test.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 77,79.77515201 3 | 21,23.17727887 4 | 22,25.60926156 5 | 20,17.85738813 6 | 36,41.84986439 7 | 15,9.805234876 8 | 62,58.87465933 9 | 95,97.61793701 10 | 20,18.39512747 11 | 5,8.746747654 12 | 4,2.811415826 13 | 19,17.09537241 14 | 96,95.14907176 15 | 62,61.38800663 16 | 36,40.24701716 17 | 15,14.82248589 18 | 65,66.95806869 19 | 14,16.63507984 20 | 87,90.65513736 21 | 69,77.22982636 22 | 89,92.11906278 23 | 51,46.91387709 24 | 89,89.82634442 25 | 27,21.71380347 26 | 97,97.41206981 27 | 58,57.01631363 28 | 79,78.31056542 29 | 21,19.1315097 30 | 93,93.03483388 31 | 27,26.59112396 32 | 99,97.55155344 33 | 31,31.43524822 34 | 33,35.12724777 35 | 80,78.61042432 36 | 28,33.07112825 37 | 47,51.69967172 38 | 53,53.62235225 39 | 69,69.46306072 40 | 28,27.42497237 41 | 33,36.34644189 42 | 91,95.06140858 43 | 71,68.16724757 44 | 50,50.96155532 45 | 76,78.04237454 46 | 4,5.607664865 47 | 37,36.11334779 48 | 70,67.2352155 49 | 68,65.01324035 50 | 40,38.14753871 51 | 35,34.31141446 52 | 94,95.28503937 53 | 88,87.84749912 54 | 52,54.08170635 55 | 31,31.93063515 56 | 59,59.61247085 57 | 0,-1.040114209 58 | 39,47.49374765 59 | 64,62.60089773 60 | 69,70.9146434 61 | 57,56.14834113 62 | 13,14.05572877 63 | 72,68.11367147 64 | 76,75.59701346 65 | 61,59.225745 66 | 82,85.45504157 67 | 18,17.76197116 68 | 41,38.68888682 69 | 50,50.96343637 70 | 55,51.83503872 71 | 13,17.0761107 72 | 46,46.56141773 73 | 13,10.34754461 74 | 79,77.91032969 75 | 53,50.17008622 76 | 15,13.25690647 77 | 28,31.32274932 78 | 81,73.9308764 79 | 69,74.45114379 80 | 52,52.01932286 81 | 84,83.68820499 82 | 68,70.3698748 83 | 27,23.44479161 84 | 56,49.83051801 85 | 48,49.88226593 86 | 40,41.04525583 87 | 39,33.37834391 88 | 82,81.29750133 89 | 100,105.5918375 90 | 59,56.82457013 91 | 43,48.67252645 92 | 67,67.02150613 93 | 38,38.43076389 94 | 63,58.61466887 95 | 91,89.12377509 96 | 60,60.9105427 97 | 14,13.83959878 98 | 21,16.89085185 99 | 87,84.06676818 100 | 73,70.34969772 101 | 32,33.38474138 102 | 2,-1.63296825 103 | 82,88.54475895 104 | 19,17.44047622 105 | 74,75.69298554 106 | 42,41.97607107 107 | 12,12.59244741 108 | 1,0.275307261 109 | 90,98.13258005 110 | 89,87.45721555 111 | 0,-2.344738542 112 | 41,39.3294153 113 | 16,16.68715211 114 | 94,96.58888601 115 | 97,97.70342201 116 | 66,67.01715955 117 | 24,25.63476257 118 | 17,13.41310757 119 | 90,95.15647284 120 | 13,9.744164258 121 | 0,-3.467883789 122 | 64,62.82816355 123 | 96,97.27405461 124 | 98,95.58017185 125 | 12,7.468501839 126 | 41,45.44599591 127 | 47,46.69013968 128 | 78,74.4993599 129 | 20,21.63500655 130 | 89,91.59548851 131 | 29,26.49487961 132 | 64,67.38654703 133 | 75,74.25362837 134 | 12,12.07991648 135 | 25,21.32273728 136 | 28,29.31770045 137 | 30,26.48713683 138 | 65,68.94699774 139 | 59,59.10598995 140 | 64,64.37521087 141 | 53,60.20758349 142 | 71,70.34329706 143 | 97,97.1082562 144 | 73,75.7584178 145 | 9,10.80462727 146 | 12,12.11219941 147 | 63,63.28312382 148 | 99,98.03017721 149 | 60,63.19354354 150 | 35,34.8534823 151 | 2,-2.819913974 152 | 60,59.8313966 153 | 32,29.38505024 154 | 94,97.00148372 155 | 84,85.18657275 156 | 63,61.74063192 157 | 22,18.84798163 158 | 81,78.79008525 159 | 93,95.12400481 160 | 33,30.48881287 161 | 7,10.41468095 162 | 42,38.98317436 163 | 46,46.11021062 164 | 54,52.45103628 165 | 16,21.16523945 166 | 49,52.28620611 167 | 43,44.18863945 168 | 95,97.13832018 169 | 66,67.22008001 170 | 21,18.98322306 171 | 35,24.3884599 172 | 80,79.44769523 173 | 37,40.03504862 174 | 54,53.32005764 175 | 56,54.55446979 176 | 1,-2.761182595 177 | 32,37.80182795 178 | 58,57.48741435 179 | 32,36.06292994 180 | 46,49.83538167 181 | 72,74.68953276 182 | 17,14.86159401 183 | 97,101.0697879 184 | 93,99.43577876 185 | 91,91.69240746 186 | 37,34.12473248 187 | 4,6.079390073 188 | 54,59.07247174 189 | 51,56.43046022 190 | 27,30.49412933 191 | 46,48.35172635 192 | 92,89.73153611 193 | 73,72.86282528 194 | 77,80.97144285 195 | 91,91.36566374 196 | 61,60.07137496 197 | 99,99.87382707 198 | 4,8.655714172 199 | 72,69.39858505 200 | 19,19.38780134 201 | 57,53.11628433 202 | 78,78.39683006 203 | 26,25.75612514 204 | 74,75.07484683 205 | 90,92.88772282 206 | 66,69.45498498 207 | 13,13.12109842 208 | 40,48.09843134 209 | 77,79.3142548 210 | 67,68.48820749 211 | 75,73.2300846 212 | 23,24.68362712 213 | 45,41.90368917 214 | 59,62.22635684 215 | 44,45.96396877 216 | 23,23.52647153 217 | 55,51.80035866 218 | 55,51.10774273 219 | 95,95.79747345 220 | 12,9.241138977 221 | 4,7.646529763 222 | 7,9.281699753 223 | 100,103.5266162 224 | 48,47.41006725 225 | 42,42.03835773 226 | 96,96.11982476 227 | 39,38.05766408 228 | 100,105.4503788 229 | 87,88.80306911 230 | 14,15.49301141 231 | 14,12.42624606 232 | 37,40.00709598 233 | 5,5.634030902 234 | 88,87.36938931 235 | 91,89.73951993 236 | 65,66.61499643 237 | 74,72.9138853 238 | 56,57.19103506 239 | 16,11.21710477 240 | 5,0.676076749 241 | 28,28.15668543 242 | 92,95.3958003 243 | 46,52.05490703 244 | 54,59.70864577 245 | 39,36.79224762 246 | 44,37.08457698 247 | 31,24.18437976 248 | 68,67.28725332 249 | 86,82.870594 250 | 90,89.899991 251 | 38,36.94173178 252 | 21,19.87562242 253 | 95,90.71481654 254 | 56,61.09367762 255 | 60,60.11134958 256 | 65,64.83296316 257 | 78,81.40381769 258 | 89,92.40217686 259 | 6,2.576625376 260 | 67,63.80768172 261 | 36,38.67780759 262 | 16,16.82839701 263 | 100,99.78687252 264 | 45,44.68913433 265 | 73,71.00377824 266 | 57,51.57326718 267 | 20,19.87846479 268 | 76,79.50341495 269 | 34,34.58876491 270 | 55,55.7383467 271 | 72,68.19721905 272 | 55,55.81628509 273 | 8,9.391416798 274 | 56,56.01448111 275 | 72,77.9969477 276 | 58,55.37049953 277 | 6,11.89457829 278 | 96,94.79081712 279 | 23,25.69041546 280 | 58,53.52042319 281 | 23,18.31396758 282 | 19,21.42637785 283 | 25,30.41303282 284 | 64,67.68142149 285 | 21,17.0854783 286 | 59,60.91792707 287 | 19,14.99514319 288 | 16,16.74923937 289 | 42,41.46923883 290 | 43,42.84526108 291 | 61,59.12912974 292 | 92,91.30863673 293 | 11,8.673336357 294 | 41,39.31485292 295 | 1,5.313686205 296 | 8,5.405220518 297 | 71,68.5458879 298 | 46,47.33487629 299 | 55,54.09063686 300 | 62,63.29717058 301 | 47,52.45946688 -------------------------------------------------------------------------------- /lab_asssigment/5_normal_equation/windows/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model.py') as response: 60 | test_code = response.read() 61 | 62 | 63 | test_module = types.ModuleType( 64 | 'test_code', 65 | doc='Test case') 66 | 67 | exec(test_code, test_module.__dict__) 68 | sys.modules['test_code'] = test_module 69 | 70 | import test_code as tc 71 | loader = unittest.loader.defaultTestLoader 72 | null_stream = open(os.devnull, "w") 73 | test_suite = loader.loadTestsFromModule(tc) 74 | result = unittest.TextTestRunner( 75 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 76 | 77 | print("Generating result sheet...") 78 | print("-------------------------------------------------------------------") 79 | print(" Test Case | Passed? | Feedback") 80 | print("-------------------------------------------------------------------") 81 | for c, r in result.tests_run: 82 | print("{0:s} | {1:s} | {2} ".format( 83 | c.rsplit('.', 1)[1].rjust(26), 84 | "PASSED" if r == 1 else "FAILED", 85 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 86 | 87 | # print(json.dumps(result.tests_run)) 88 | print("Reading source file...") 89 | 90 | file = open(filename, "r") 91 | print("Transferring results to server...") 92 | payload = { 93 | 'hashkey': hashkey, 94 | 'result': result.tests_run, 95 | 'code': file.read() 96 | } 97 | try: 98 | data = urllib.parse.urlencode(payload) 99 | data = data.encode('ascii') 100 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 101 | with urllib.request.urlopen(req) as response: 102 | resp = response.read() 103 | 104 | if json.loads(resp)['result'] == 0: 105 | print("Transfer failed: hash key is already used.") 106 | else: 107 | print("Transfer completed.") 108 | 109 | except Exception as e: 110 | print("Error occurred on transferring.", e) 111 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linear_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinearRegressionGD(object): 5 | def __init__(self, fit_intercept=True, copy_X=True, 6 | eta0=0.001, epochs=1000, weight_decay=0.9): 7 | self.fit_intercept = fit_intercept 8 | self.copy_X = copy_X 9 | self._eta0 = eta0 10 | self._epochs = epochs 11 | 12 | self._cost_history = [] 13 | 14 | self._coef = None 15 | self._intercept = None 16 | self._new_X = None 17 | self._w_history = None 18 | self._weight_decay = weight_decay 19 | 20 | def cost(self, h, y): 21 | pass 22 | 23 | def hypothesis_function(self, X, theta): 24 | pass 25 | 26 | def gradient(self, X, y, theta): 27 | pass 28 | 29 | def fit(self, X, y): 30 | # Write your code 31 | 32 | for epoch in range(self._epochs): 33 | # 아래 코드를 반드시 활용할 것 34 | gradient = self.gradient(self._new_X, y, theta).flatten() 35 | 36 | # Write your code 37 | 38 | if epoch % 100 == 0: 39 | self._w_history.append(theta) 40 | cost = self.cost( 41 | self.hypothesis_function(self._new_X, theta), y) 42 | self._cost_history.append(cost) 43 | self._eta0 = self._eta0 * self._weight_decay 44 | 45 | # Write your code 46 | 47 | def predict(self, X): 48 | pass 49 | 50 | @property 51 | def coef(self): 52 | return self._coef 53 | 54 | @property 55 | def intercept(self): 56 | return self._intercept 57 | 58 | @property 59 | def weights_history(self): 60 | return np.array(self._w_history) 61 | 62 | @property 63 | def cost_history(self): 64 | return self._cost_history 65 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linux_mac/install.sh: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linux_mac/linear_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinearRegressionGD(object): 5 | def __init__(self, fit_intercept=True, copy_X=True, 6 | eta0=0.001, epochs=1000, weight_decay=0.9): 7 | self.fit_intercept = fit_intercept 8 | self.copy_X = copy_X 9 | self._eta0 = eta0 10 | self._epochs = epochs 11 | 12 | self._cost_history = [] 13 | 14 | self._coef = None 15 | self._intercept = None 16 | self._new_X = None 17 | self._w_history = None 18 | self._weight_decay = weight_decay 19 | 20 | def cost(self, h, y): 21 | pass 22 | 23 | def hypothesis_function(self, X, theta): 24 | pass 25 | 26 | def gradient(self, X, y, theta): 27 | pass 28 | 29 | def fit(self, X, y): 30 | # Write your code 31 | 32 | for epoch in range(self._epochs): 33 | # 아래 코드를 반드시 활용할 것 34 | gradient = self.gradient(self._new_X, y, theta).flatten() 35 | 36 | # Write your code 37 | 38 | if epoch % 100 == 0: 39 | self._w_history.append(theta) 40 | cost = self.cost( 41 | self.hypothesis_function(self._new_X, theta), y) 42 | self._cost_history.append(cost) 43 | self._eta0 = self._eta0 * self._weight_decay 44 | 45 | # Write your code 46 | 47 | def predict(self, X): 48 | pass 49 | 50 | @property 51 | def coef(self): 52 | return self._coef 53 | 54 | @property 55 | def intercept(self): 56 | return self._intercept 57 | 58 | @property 59 | def weights_history(self): 60 | return np.array(self._w_history) 61 | 62 | @property 63 | def cost_history(self): 64 | return self._cost_history 65 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linux_mac/mlr09.csv: -------------------------------------------------------------------------------- 1 | height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored 2 | 6.8,225,0.442,0.672,9.2 6.3,180,0.435,0.797,11.7 6.4,190,0.456,0.761,15.8 6.2,180,0.416,0.651,8.6 6.9,205,0.449,0.9,23.2 6.4,225,0.431,0.78,27.4 6.3,185,0.487,0.771,9.3 6.8,235,0.469,0.75,16 6.9,235,0.435,0.818,4.7 6.7,210,0.48,0.825,12.5 6.9,245,0.516,0.632,20.1 6.9,245,0.493,0.757,9.1 6.3,185,0.374,0.709,8.1 6.1,185,0.424,0.782,8.6 6.2,180,0.441,0.775,20.3 6.8,220,0.503,0.88,25 6.5,194,0.503,0.833,19.2 7.6,225,0.425,0.571,3.3 6.3,210,0.371,0.816,11.2 7.1,240,0.504,0.714,10.5 6.8,225,0.4,0.765,10.1 7.3,263,0.482,0.655,7.2 6.4,210,0.475,0.244,13.6 6.8,235,0.428,0.728,9 7.2,230,0.559,0.721,24.6 6.4,190,0.441,0.757,12.6 6.6,220,0.492,0.747,5.6 6.8,210,0.402,0.739,8.7 6.1,180,0.415,0.713,7.7 6.5,235,0.492,0.742,24.1 6.4,185,0.484,0.861,11.7 6,175,0.387,0.721,7.7 6,192,0.436,0.785,9.6 7.3,263,0.482,0.655,7.2 6.1,180,0.34,0.821,12.3 6.7,240,0.516,0.728,8.9 6.4,210,0.475,0.846,13.6 5.8,160,0.412,0.813,11.2 6.9,230,0.411,0.595,2.8 7,245,0.407,0.573,3.2 7.3,228,0.445,0.726,9.4 5.9,155,0.291,0.707,11.9 6.2,200,0.449,0.804,15.4 6.8,235,0.546,0.784,7.4 7,235,0.48,0.744,18.9 5.9,105,0.359,0.839,7.9 6.1,180,0.528,0.79,12.2 5.7,185,0.352,0.701,11 7.1,245,0.414,0.778,2.8 5.8,180,0.425,0.872,11.8 7.4,240,0.599,0.713,17.1 6.8,225,0.482,0.701,11.6 6.8,215,0.457,0.734,5.8 7,230,0.435,0.764,8.3 3 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linux_mac/submit.sh: -------------------------------------------------------------------------------- 1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 2 | export BACKEND_SECRET_KEY=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 3 | if [ $# -eq 0 ] 4 | then 5 | echo "Please give hash key as argument." 6 | else 7 | backend.ai run --exec "python test.py linear_model.py $1" python3 test.py linear_model.py test.csv train.csv mlr09.csv 8 | fi 9 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linux_mac/test.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 77,79.77515201 3 | 21,23.17727887 4 | 22,25.60926156 5 | 20,17.85738813 6 | 36,41.84986439 7 | 15,9.805234876 8 | 62,58.87465933 9 | 95,97.61793701 10 | 20,18.39512747 11 | 5,8.746747654 12 | 4,2.811415826 13 | 19,17.09537241 14 | 96,95.14907176 15 | 62,61.38800663 16 | 36,40.24701716 17 | 15,14.82248589 18 | 65,66.95806869 19 | 14,16.63507984 20 | 87,90.65513736 21 | 69,77.22982636 22 | 89,92.11906278 23 | 51,46.91387709 24 | 89,89.82634442 25 | 27,21.71380347 26 | 97,97.41206981 27 | 58,57.01631363 28 | 79,78.31056542 29 | 21,19.1315097 30 | 93,93.03483388 31 | 27,26.59112396 32 | 99,97.55155344 33 | 31,31.43524822 34 | 33,35.12724777 35 | 80,78.61042432 36 | 28,33.07112825 37 | 47,51.69967172 38 | 53,53.62235225 39 | 69,69.46306072 40 | 28,27.42497237 41 | 33,36.34644189 42 | 91,95.06140858 43 | 71,68.16724757 44 | 50,50.96155532 45 | 76,78.04237454 46 | 4,5.607664865 47 | 37,36.11334779 48 | 70,67.2352155 49 | 68,65.01324035 50 | 40,38.14753871 51 | 35,34.31141446 52 | 94,95.28503937 53 | 88,87.84749912 54 | 52,54.08170635 55 | 31,31.93063515 56 | 59,59.61247085 57 | 0,-1.040114209 58 | 39,47.49374765 59 | 64,62.60089773 60 | 69,70.9146434 61 | 57,56.14834113 62 | 13,14.05572877 63 | 72,68.11367147 64 | 76,75.59701346 65 | 61,59.225745 66 | 82,85.45504157 67 | 18,17.76197116 68 | 41,38.68888682 69 | 50,50.96343637 70 | 55,51.83503872 71 | 13,17.0761107 72 | 46,46.56141773 73 | 13,10.34754461 74 | 79,77.91032969 75 | 53,50.17008622 76 | 15,13.25690647 77 | 28,31.32274932 78 | 81,73.9308764 79 | 69,74.45114379 80 | 52,52.01932286 81 | 84,83.68820499 82 | 68,70.3698748 83 | 27,23.44479161 84 | 56,49.83051801 85 | 48,49.88226593 86 | 40,41.04525583 87 | 39,33.37834391 88 | 82,81.29750133 89 | 100,105.5918375 90 | 59,56.82457013 91 | 43,48.67252645 92 | 67,67.02150613 93 | 38,38.43076389 94 | 63,58.61466887 95 | 91,89.12377509 96 | 60,60.9105427 97 | 14,13.83959878 98 | 21,16.89085185 99 | 87,84.06676818 100 | 73,70.34969772 101 | 32,33.38474138 102 | 2,-1.63296825 103 | 82,88.54475895 104 | 19,17.44047622 105 | 74,75.69298554 106 | 42,41.97607107 107 | 12,12.59244741 108 | 1,0.275307261 109 | 90,98.13258005 110 | 89,87.45721555 111 | 0,-2.344738542 112 | 41,39.3294153 113 | 16,16.68715211 114 | 94,96.58888601 115 | 97,97.70342201 116 | 66,67.01715955 117 | 24,25.63476257 118 | 17,13.41310757 119 | 90,95.15647284 120 | 13,9.744164258 121 | 0,-3.467883789 122 | 64,62.82816355 123 | 96,97.27405461 124 | 98,95.58017185 125 | 12,7.468501839 126 | 41,45.44599591 127 | 47,46.69013968 128 | 78,74.4993599 129 | 20,21.63500655 130 | 89,91.59548851 131 | 29,26.49487961 132 | 64,67.38654703 133 | 75,74.25362837 134 | 12,12.07991648 135 | 25,21.32273728 136 | 28,29.31770045 137 | 30,26.48713683 138 | 65,68.94699774 139 | 59,59.10598995 140 | 64,64.37521087 141 | 53,60.20758349 142 | 71,70.34329706 143 | 97,97.1082562 144 | 73,75.7584178 145 | 9,10.80462727 146 | 12,12.11219941 147 | 63,63.28312382 148 | 99,98.03017721 149 | 60,63.19354354 150 | 35,34.8534823 151 | 2,-2.819913974 152 | 60,59.8313966 153 | 32,29.38505024 154 | 94,97.00148372 155 | 84,85.18657275 156 | 63,61.74063192 157 | 22,18.84798163 158 | 81,78.79008525 159 | 93,95.12400481 160 | 33,30.48881287 161 | 7,10.41468095 162 | 42,38.98317436 163 | 46,46.11021062 164 | 54,52.45103628 165 | 16,21.16523945 166 | 49,52.28620611 167 | 43,44.18863945 168 | 95,97.13832018 169 | 66,67.22008001 170 | 21,18.98322306 171 | 35,24.3884599 172 | 80,79.44769523 173 | 37,40.03504862 174 | 54,53.32005764 175 | 56,54.55446979 176 | 1,-2.761182595 177 | 32,37.80182795 178 | 58,57.48741435 179 | 32,36.06292994 180 | 46,49.83538167 181 | 72,74.68953276 182 | 17,14.86159401 183 | 97,101.0697879 184 | 93,99.43577876 185 | 91,91.69240746 186 | 37,34.12473248 187 | 4,6.079390073 188 | 54,59.07247174 189 | 51,56.43046022 190 | 27,30.49412933 191 | 46,48.35172635 192 | 92,89.73153611 193 | 73,72.86282528 194 | 77,80.97144285 195 | 91,91.36566374 196 | 61,60.07137496 197 | 99,99.87382707 198 | 4,8.655714172 199 | 72,69.39858505 200 | 19,19.38780134 201 | 57,53.11628433 202 | 78,78.39683006 203 | 26,25.75612514 204 | 74,75.07484683 205 | 90,92.88772282 206 | 66,69.45498498 207 | 13,13.12109842 208 | 40,48.09843134 209 | 77,79.3142548 210 | 67,68.48820749 211 | 75,73.2300846 212 | 23,24.68362712 213 | 45,41.90368917 214 | 59,62.22635684 215 | 44,45.96396877 216 | 23,23.52647153 217 | 55,51.80035866 218 | 55,51.10774273 219 | 95,95.79747345 220 | 12,9.241138977 221 | 4,7.646529763 222 | 7,9.281699753 223 | 100,103.5266162 224 | 48,47.41006725 225 | 42,42.03835773 226 | 96,96.11982476 227 | 39,38.05766408 228 | 100,105.4503788 229 | 87,88.80306911 230 | 14,15.49301141 231 | 14,12.42624606 232 | 37,40.00709598 233 | 5,5.634030902 234 | 88,87.36938931 235 | 91,89.73951993 236 | 65,66.61499643 237 | 74,72.9138853 238 | 56,57.19103506 239 | 16,11.21710477 240 | 5,0.676076749 241 | 28,28.15668543 242 | 92,95.3958003 243 | 46,52.05490703 244 | 54,59.70864577 245 | 39,36.79224762 246 | 44,37.08457698 247 | 31,24.18437976 248 | 68,67.28725332 249 | 86,82.870594 250 | 90,89.899991 251 | 38,36.94173178 252 | 21,19.87562242 253 | 95,90.71481654 254 | 56,61.09367762 255 | 60,60.11134958 256 | 65,64.83296316 257 | 78,81.40381769 258 | 89,92.40217686 259 | 6,2.576625376 260 | 67,63.80768172 261 | 36,38.67780759 262 | 16,16.82839701 263 | 100,99.78687252 264 | 45,44.68913433 265 | 73,71.00377824 266 | 57,51.57326718 267 | 20,19.87846479 268 | 76,79.50341495 269 | 34,34.58876491 270 | 55,55.7383467 271 | 72,68.19721905 272 | 55,55.81628509 273 | 8,9.391416798 274 | 56,56.01448111 275 | 72,77.9969477 276 | 58,55.37049953 277 | 6,11.89457829 278 | 96,94.79081712 279 | 23,25.69041546 280 | 58,53.52042319 281 | 23,18.31396758 282 | 19,21.42637785 283 | 25,30.41303282 284 | 64,67.68142149 285 | 21,17.0854783 286 | 59,60.91792707 287 | 19,14.99514319 288 | 16,16.74923937 289 | 42,41.46923883 290 | 43,42.84526108 291 | 61,59.12912974 292 | 92,91.30863673 293 | 11,8.673336357 294 | 41,39.31485292 295 | 1,5.313686205 296 | 8,5.405220518 297 | 71,68.5458879 298 | 46,47.33487629 299 | 55,54.09063686 300 | 62,63.29717058 301 | 47,52.45946688 -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/linux_mac/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model_gd.py') as response: 60 | test_code = response.read() 61 | 62 | 63 | test_module = types.ModuleType( 64 | 'test_code', 65 | doc='Test case') 66 | 67 | exec(test_code, test_module.__dict__) 68 | sys.modules['test_code'] = test_module 69 | 70 | import test_code as tc 71 | loader = unittest.loader.defaultTestLoader 72 | null_stream = open(os.devnull, "w") 73 | test_suite = loader.loadTestsFromModule(tc) 74 | result = unittest.TextTestRunner( 75 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 76 | 77 | print("Generating result sheet...") 78 | print("-------------------------------------------------------------------") 79 | print(" Test Case | Passed? | Feedback") 80 | print("-------------------------------------------------------------------") 81 | for c, r in result.tests_run: 82 | print("{0:s} | {1:s} | {2} ".format( 83 | c.rsplit('.', 1)[1].rjust(26), 84 | "PASSED" if r == 1 else "FAILED", 85 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 86 | 87 | # print(json.dumps(result.tests_run)) 88 | print("Reading source file...") 89 | 90 | file = open(filename, "r") 91 | print("Transferring results to server...") 92 | payload = { 93 | 'hashkey': hashkey, 94 | 'result': result.tests_run, 95 | 'code': file.read() 96 | } 97 | try: 98 | data = urllib.parse.urlencode(payload) 99 | data = data.encode('ascii') 100 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 101 | with urllib.request.urlopen(req) as response: 102 | resp = response.read() 103 | 104 | if json.loads(resp)['result'] == 0: 105 | print("Transfer failed: hash key is already used.") 106 | else: 107 | print("Transfer completed.") 108 | 109 | except Exception as e: 110 | print("Error occurred on transferring.", e) 111 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/mlr09.csv: -------------------------------------------------------------------------------- 1 | height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored 2 | 6.8,225,0.442,0.672,9.2 6.3,180,0.435,0.797,11.7 6.4,190,0.456,0.761,15.8 6.2,180,0.416,0.651,8.6 6.9,205,0.449,0.9,23.2 6.4,225,0.431,0.78,27.4 6.3,185,0.487,0.771,9.3 6.8,235,0.469,0.75,16 6.9,235,0.435,0.818,4.7 6.7,210,0.48,0.825,12.5 6.9,245,0.516,0.632,20.1 6.9,245,0.493,0.757,9.1 6.3,185,0.374,0.709,8.1 6.1,185,0.424,0.782,8.6 6.2,180,0.441,0.775,20.3 6.8,220,0.503,0.88,25 6.5,194,0.503,0.833,19.2 7.6,225,0.425,0.571,3.3 6.3,210,0.371,0.816,11.2 7.1,240,0.504,0.714,10.5 6.8,225,0.4,0.765,10.1 7.3,263,0.482,0.655,7.2 6.4,210,0.475,0.244,13.6 6.8,235,0.428,0.728,9 7.2,230,0.559,0.721,24.6 6.4,190,0.441,0.757,12.6 6.6,220,0.492,0.747,5.6 6.8,210,0.402,0.739,8.7 6.1,180,0.415,0.713,7.7 6.5,235,0.492,0.742,24.1 6.4,185,0.484,0.861,11.7 6,175,0.387,0.721,7.7 6,192,0.436,0.785,9.6 7.3,263,0.482,0.655,7.2 6.1,180,0.34,0.821,12.3 6.7,240,0.516,0.728,8.9 6.4,210,0.475,0.846,13.6 5.8,160,0.412,0.813,11.2 6.9,230,0.411,0.595,2.8 7,245,0.407,0.573,3.2 7.3,228,0.445,0.726,9.4 5.9,155,0.291,0.707,11.9 6.2,200,0.449,0.804,15.4 6.8,235,0.546,0.784,7.4 7,235,0.48,0.744,18.9 5.9,105,0.359,0.839,7.9 6.1,180,0.528,0.79,12.2 5.7,185,0.352,0.701,11 7.1,245,0.414,0.778,2.8 5.8,180,0.425,0.872,11.8 7.4,240,0.599,0.713,17.1 6.8,225,0.482,0.701,11.6 6.8,215,0.457,0.734,5.8 7,230,0.435,0.764,8.3 3 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/test.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 77,79.77515201 3 | 21,23.17727887 4 | 22,25.60926156 5 | 20,17.85738813 6 | 36,41.84986439 7 | 15,9.805234876 8 | 62,58.87465933 9 | 95,97.61793701 10 | 20,18.39512747 11 | 5,8.746747654 12 | 4,2.811415826 13 | 19,17.09537241 14 | 96,95.14907176 15 | 62,61.38800663 16 | 36,40.24701716 17 | 15,14.82248589 18 | 65,66.95806869 19 | 14,16.63507984 20 | 87,90.65513736 21 | 69,77.22982636 22 | 89,92.11906278 23 | 51,46.91387709 24 | 89,89.82634442 25 | 27,21.71380347 26 | 97,97.41206981 27 | 58,57.01631363 28 | 79,78.31056542 29 | 21,19.1315097 30 | 93,93.03483388 31 | 27,26.59112396 32 | 99,97.55155344 33 | 31,31.43524822 34 | 33,35.12724777 35 | 80,78.61042432 36 | 28,33.07112825 37 | 47,51.69967172 38 | 53,53.62235225 39 | 69,69.46306072 40 | 28,27.42497237 41 | 33,36.34644189 42 | 91,95.06140858 43 | 71,68.16724757 44 | 50,50.96155532 45 | 76,78.04237454 46 | 4,5.607664865 47 | 37,36.11334779 48 | 70,67.2352155 49 | 68,65.01324035 50 | 40,38.14753871 51 | 35,34.31141446 52 | 94,95.28503937 53 | 88,87.84749912 54 | 52,54.08170635 55 | 31,31.93063515 56 | 59,59.61247085 57 | 0,-1.040114209 58 | 39,47.49374765 59 | 64,62.60089773 60 | 69,70.9146434 61 | 57,56.14834113 62 | 13,14.05572877 63 | 72,68.11367147 64 | 76,75.59701346 65 | 61,59.225745 66 | 82,85.45504157 67 | 18,17.76197116 68 | 41,38.68888682 69 | 50,50.96343637 70 | 55,51.83503872 71 | 13,17.0761107 72 | 46,46.56141773 73 | 13,10.34754461 74 | 79,77.91032969 75 | 53,50.17008622 76 | 15,13.25690647 77 | 28,31.32274932 78 | 81,73.9308764 79 | 69,74.45114379 80 | 52,52.01932286 81 | 84,83.68820499 82 | 68,70.3698748 83 | 27,23.44479161 84 | 56,49.83051801 85 | 48,49.88226593 86 | 40,41.04525583 87 | 39,33.37834391 88 | 82,81.29750133 89 | 100,105.5918375 90 | 59,56.82457013 91 | 43,48.67252645 92 | 67,67.02150613 93 | 38,38.43076389 94 | 63,58.61466887 95 | 91,89.12377509 96 | 60,60.9105427 97 | 14,13.83959878 98 | 21,16.89085185 99 | 87,84.06676818 100 | 73,70.34969772 101 | 32,33.38474138 102 | 2,-1.63296825 103 | 82,88.54475895 104 | 19,17.44047622 105 | 74,75.69298554 106 | 42,41.97607107 107 | 12,12.59244741 108 | 1,0.275307261 109 | 90,98.13258005 110 | 89,87.45721555 111 | 0,-2.344738542 112 | 41,39.3294153 113 | 16,16.68715211 114 | 94,96.58888601 115 | 97,97.70342201 116 | 66,67.01715955 117 | 24,25.63476257 118 | 17,13.41310757 119 | 90,95.15647284 120 | 13,9.744164258 121 | 0,-3.467883789 122 | 64,62.82816355 123 | 96,97.27405461 124 | 98,95.58017185 125 | 12,7.468501839 126 | 41,45.44599591 127 | 47,46.69013968 128 | 78,74.4993599 129 | 20,21.63500655 130 | 89,91.59548851 131 | 29,26.49487961 132 | 64,67.38654703 133 | 75,74.25362837 134 | 12,12.07991648 135 | 25,21.32273728 136 | 28,29.31770045 137 | 30,26.48713683 138 | 65,68.94699774 139 | 59,59.10598995 140 | 64,64.37521087 141 | 53,60.20758349 142 | 71,70.34329706 143 | 97,97.1082562 144 | 73,75.7584178 145 | 9,10.80462727 146 | 12,12.11219941 147 | 63,63.28312382 148 | 99,98.03017721 149 | 60,63.19354354 150 | 35,34.8534823 151 | 2,-2.819913974 152 | 60,59.8313966 153 | 32,29.38505024 154 | 94,97.00148372 155 | 84,85.18657275 156 | 63,61.74063192 157 | 22,18.84798163 158 | 81,78.79008525 159 | 93,95.12400481 160 | 33,30.48881287 161 | 7,10.41468095 162 | 42,38.98317436 163 | 46,46.11021062 164 | 54,52.45103628 165 | 16,21.16523945 166 | 49,52.28620611 167 | 43,44.18863945 168 | 95,97.13832018 169 | 66,67.22008001 170 | 21,18.98322306 171 | 35,24.3884599 172 | 80,79.44769523 173 | 37,40.03504862 174 | 54,53.32005764 175 | 56,54.55446979 176 | 1,-2.761182595 177 | 32,37.80182795 178 | 58,57.48741435 179 | 32,36.06292994 180 | 46,49.83538167 181 | 72,74.68953276 182 | 17,14.86159401 183 | 97,101.0697879 184 | 93,99.43577876 185 | 91,91.69240746 186 | 37,34.12473248 187 | 4,6.079390073 188 | 54,59.07247174 189 | 51,56.43046022 190 | 27,30.49412933 191 | 46,48.35172635 192 | 92,89.73153611 193 | 73,72.86282528 194 | 77,80.97144285 195 | 91,91.36566374 196 | 61,60.07137496 197 | 99,99.87382707 198 | 4,8.655714172 199 | 72,69.39858505 200 | 19,19.38780134 201 | 57,53.11628433 202 | 78,78.39683006 203 | 26,25.75612514 204 | 74,75.07484683 205 | 90,92.88772282 206 | 66,69.45498498 207 | 13,13.12109842 208 | 40,48.09843134 209 | 77,79.3142548 210 | 67,68.48820749 211 | 75,73.2300846 212 | 23,24.68362712 213 | 45,41.90368917 214 | 59,62.22635684 215 | 44,45.96396877 216 | 23,23.52647153 217 | 55,51.80035866 218 | 55,51.10774273 219 | 95,95.79747345 220 | 12,9.241138977 221 | 4,7.646529763 222 | 7,9.281699753 223 | 100,103.5266162 224 | 48,47.41006725 225 | 42,42.03835773 226 | 96,96.11982476 227 | 39,38.05766408 228 | 100,105.4503788 229 | 87,88.80306911 230 | 14,15.49301141 231 | 14,12.42624606 232 | 37,40.00709598 233 | 5,5.634030902 234 | 88,87.36938931 235 | 91,89.73951993 236 | 65,66.61499643 237 | 74,72.9138853 238 | 56,57.19103506 239 | 16,11.21710477 240 | 5,0.676076749 241 | 28,28.15668543 242 | 92,95.3958003 243 | 46,52.05490703 244 | 54,59.70864577 245 | 39,36.79224762 246 | 44,37.08457698 247 | 31,24.18437976 248 | 68,67.28725332 249 | 86,82.870594 250 | 90,89.899991 251 | 38,36.94173178 252 | 21,19.87562242 253 | 95,90.71481654 254 | 56,61.09367762 255 | 60,60.11134958 256 | 65,64.83296316 257 | 78,81.40381769 258 | 89,92.40217686 259 | 6,2.576625376 260 | 67,63.80768172 261 | 36,38.67780759 262 | 16,16.82839701 263 | 100,99.78687252 264 | 45,44.68913433 265 | 73,71.00377824 266 | 57,51.57326718 267 | 20,19.87846479 268 | 76,79.50341495 269 | 34,34.58876491 270 | 55,55.7383467 271 | 72,68.19721905 272 | 55,55.81628509 273 | 8,9.391416798 274 | 56,56.01448111 275 | 72,77.9969477 276 | 58,55.37049953 277 | 6,11.89457829 278 | 96,94.79081712 279 | 23,25.69041546 280 | 58,53.52042319 281 | 23,18.31396758 282 | 19,21.42637785 283 | 25,30.41303282 284 | 64,67.68142149 285 | 21,17.0854783 286 | 59,60.91792707 287 | 19,14.99514319 288 | 16,16.74923937 289 | 42,41.46923883 290 | 43,42.84526108 291 | 61,59.12912974 292 | 92,91.30863673 293 | 11,8.673336357 294 | 41,39.31485292 295 | 1,5.313686205 296 | 8,5.405220518 297 | 71,68.5458879 298 | 46,47.33487629 299 | 55,54.09063686 300 | 62,63.29717058 301 | 47,52.45946688 -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/windows/install.bat: -------------------------------------------------------------------------------- 1 | pip install -U backend.ai-client 2 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/windows/linear_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinearRegressionGD(object): 5 | def __init__(self, fit_intercept=True, copy_X=True, 6 | eta0=0.001, epochs=1000, weight_decay=0.9): 7 | self.fit_intercept = fit_intercept 8 | self.copy_X = copy_X 9 | self._eta0 = eta0 10 | self._epochs = epochs 11 | 12 | self._cost_history = [] 13 | 14 | self._coef = None 15 | self._intercept = None 16 | self._new_X = None 17 | self._w_history = None 18 | self._weight_decay = weight_decay 19 | 20 | def cost(self, h, y): 21 | pass 22 | 23 | def hypothesis_function(self, X, theta): 24 | pass 25 | 26 | def gradient(self, X, y, theta): 27 | pass 28 | 29 | def fit(self, X, y): 30 | # Write your code 31 | 32 | for epoch in range(self._epochs): 33 | # 아래 코드를 반드시 활용할 것 34 | gradient = self.gradient(self._new_X, y, theta).flatten() 35 | 36 | # Write your code 37 | 38 | if epoch % 100 == 0: 39 | self._w_history.append(theta) 40 | cost = self.cost( 41 | self.hypothesis_function(self._new_X, theta), y) 42 | self._cost_history.append(cost) 43 | self._eta0 = self._eta0 * self._weight_decay 44 | 45 | # Write your code 46 | 47 | def predict(self, X): 48 | pass 49 | 50 | @property 51 | def coef(self): 52 | return self._coef 53 | 54 | @property 55 | def intercept(self): 56 | return self._intercept 57 | 58 | @property 59 | def weights_history(self): 60 | return np.array(self._w_history) 61 | 62 | @property 63 | def cost_history(self): 64 | return self._cost_history 65 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/windows/mlr09.csv: -------------------------------------------------------------------------------- 1 | height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored 2 | 6.8,225,0.442,0.672,9.2 6.3,180,0.435,0.797,11.7 6.4,190,0.456,0.761,15.8 6.2,180,0.416,0.651,8.6 6.9,205,0.449,0.9,23.2 6.4,225,0.431,0.78,27.4 6.3,185,0.487,0.771,9.3 6.8,235,0.469,0.75,16 6.9,235,0.435,0.818,4.7 6.7,210,0.48,0.825,12.5 6.9,245,0.516,0.632,20.1 6.9,245,0.493,0.757,9.1 6.3,185,0.374,0.709,8.1 6.1,185,0.424,0.782,8.6 6.2,180,0.441,0.775,20.3 6.8,220,0.503,0.88,25 6.5,194,0.503,0.833,19.2 7.6,225,0.425,0.571,3.3 6.3,210,0.371,0.816,11.2 7.1,240,0.504,0.714,10.5 6.8,225,0.4,0.765,10.1 7.3,263,0.482,0.655,7.2 6.4,210,0.475,0.244,13.6 6.8,235,0.428,0.728,9 7.2,230,0.559,0.721,24.6 6.4,190,0.441,0.757,12.6 6.6,220,0.492,0.747,5.6 6.8,210,0.402,0.739,8.7 6.1,180,0.415,0.713,7.7 6.5,235,0.492,0.742,24.1 6.4,185,0.484,0.861,11.7 6,175,0.387,0.721,7.7 6,192,0.436,0.785,9.6 7.3,263,0.482,0.655,7.2 6.1,180,0.34,0.821,12.3 6.7,240,0.516,0.728,8.9 6.4,210,0.475,0.846,13.6 5.8,160,0.412,0.813,11.2 6.9,230,0.411,0.595,2.8 7,245,0.407,0.573,3.2 7.3,228,0.445,0.726,9.4 5.9,155,0.291,0.707,11.9 6.2,200,0.449,0.804,15.4 6.8,235,0.546,0.784,7.4 7,235,0.48,0.744,18.9 5.9,105,0.359,0.839,7.9 6.1,180,0.528,0.79,12.2 5.7,185,0.352,0.701,11 7.1,245,0.414,0.778,2.8 5.8,180,0.425,0.872,11.8 7.4,240,0.599,0.713,17.1 6.8,225,0.482,0.701,11.6 6.8,215,0.457,0.734,5.8 7,230,0.435,0.764,8.3 3 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/windows/submit.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR 4 | set BACKEND_SECRET_KEY=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 5 | 6 | set tmp="%1" 7 | if "%tmp:"=.%"==".." ( 8 | echo "Please give hash key as argument." 9 | ) else ( 10 | backend.ai run --exec "python test.py linear_model.py %tmp%" python3 test.py linear_model.py test.csv train.csv mlr09.csv 11 | ) 12 | -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/windows/test.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 77,79.77515201 3 | 21,23.17727887 4 | 22,25.60926156 5 | 20,17.85738813 6 | 36,41.84986439 7 | 15,9.805234876 8 | 62,58.87465933 9 | 95,97.61793701 10 | 20,18.39512747 11 | 5,8.746747654 12 | 4,2.811415826 13 | 19,17.09537241 14 | 96,95.14907176 15 | 62,61.38800663 16 | 36,40.24701716 17 | 15,14.82248589 18 | 65,66.95806869 19 | 14,16.63507984 20 | 87,90.65513736 21 | 69,77.22982636 22 | 89,92.11906278 23 | 51,46.91387709 24 | 89,89.82634442 25 | 27,21.71380347 26 | 97,97.41206981 27 | 58,57.01631363 28 | 79,78.31056542 29 | 21,19.1315097 30 | 93,93.03483388 31 | 27,26.59112396 32 | 99,97.55155344 33 | 31,31.43524822 34 | 33,35.12724777 35 | 80,78.61042432 36 | 28,33.07112825 37 | 47,51.69967172 38 | 53,53.62235225 39 | 69,69.46306072 40 | 28,27.42497237 41 | 33,36.34644189 42 | 91,95.06140858 43 | 71,68.16724757 44 | 50,50.96155532 45 | 76,78.04237454 46 | 4,5.607664865 47 | 37,36.11334779 48 | 70,67.2352155 49 | 68,65.01324035 50 | 40,38.14753871 51 | 35,34.31141446 52 | 94,95.28503937 53 | 88,87.84749912 54 | 52,54.08170635 55 | 31,31.93063515 56 | 59,59.61247085 57 | 0,-1.040114209 58 | 39,47.49374765 59 | 64,62.60089773 60 | 69,70.9146434 61 | 57,56.14834113 62 | 13,14.05572877 63 | 72,68.11367147 64 | 76,75.59701346 65 | 61,59.225745 66 | 82,85.45504157 67 | 18,17.76197116 68 | 41,38.68888682 69 | 50,50.96343637 70 | 55,51.83503872 71 | 13,17.0761107 72 | 46,46.56141773 73 | 13,10.34754461 74 | 79,77.91032969 75 | 53,50.17008622 76 | 15,13.25690647 77 | 28,31.32274932 78 | 81,73.9308764 79 | 69,74.45114379 80 | 52,52.01932286 81 | 84,83.68820499 82 | 68,70.3698748 83 | 27,23.44479161 84 | 56,49.83051801 85 | 48,49.88226593 86 | 40,41.04525583 87 | 39,33.37834391 88 | 82,81.29750133 89 | 100,105.5918375 90 | 59,56.82457013 91 | 43,48.67252645 92 | 67,67.02150613 93 | 38,38.43076389 94 | 63,58.61466887 95 | 91,89.12377509 96 | 60,60.9105427 97 | 14,13.83959878 98 | 21,16.89085185 99 | 87,84.06676818 100 | 73,70.34969772 101 | 32,33.38474138 102 | 2,-1.63296825 103 | 82,88.54475895 104 | 19,17.44047622 105 | 74,75.69298554 106 | 42,41.97607107 107 | 12,12.59244741 108 | 1,0.275307261 109 | 90,98.13258005 110 | 89,87.45721555 111 | 0,-2.344738542 112 | 41,39.3294153 113 | 16,16.68715211 114 | 94,96.58888601 115 | 97,97.70342201 116 | 66,67.01715955 117 | 24,25.63476257 118 | 17,13.41310757 119 | 90,95.15647284 120 | 13,9.744164258 121 | 0,-3.467883789 122 | 64,62.82816355 123 | 96,97.27405461 124 | 98,95.58017185 125 | 12,7.468501839 126 | 41,45.44599591 127 | 47,46.69013968 128 | 78,74.4993599 129 | 20,21.63500655 130 | 89,91.59548851 131 | 29,26.49487961 132 | 64,67.38654703 133 | 75,74.25362837 134 | 12,12.07991648 135 | 25,21.32273728 136 | 28,29.31770045 137 | 30,26.48713683 138 | 65,68.94699774 139 | 59,59.10598995 140 | 64,64.37521087 141 | 53,60.20758349 142 | 71,70.34329706 143 | 97,97.1082562 144 | 73,75.7584178 145 | 9,10.80462727 146 | 12,12.11219941 147 | 63,63.28312382 148 | 99,98.03017721 149 | 60,63.19354354 150 | 35,34.8534823 151 | 2,-2.819913974 152 | 60,59.8313966 153 | 32,29.38505024 154 | 94,97.00148372 155 | 84,85.18657275 156 | 63,61.74063192 157 | 22,18.84798163 158 | 81,78.79008525 159 | 93,95.12400481 160 | 33,30.48881287 161 | 7,10.41468095 162 | 42,38.98317436 163 | 46,46.11021062 164 | 54,52.45103628 165 | 16,21.16523945 166 | 49,52.28620611 167 | 43,44.18863945 168 | 95,97.13832018 169 | 66,67.22008001 170 | 21,18.98322306 171 | 35,24.3884599 172 | 80,79.44769523 173 | 37,40.03504862 174 | 54,53.32005764 175 | 56,54.55446979 176 | 1,-2.761182595 177 | 32,37.80182795 178 | 58,57.48741435 179 | 32,36.06292994 180 | 46,49.83538167 181 | 72,74.68953276 182 | 17,14.86159401 183 | 97,101.0697879 184 | 93,99.43577876 185 | 91,91.69240746 186 | 37,34.12473248 187 | 4,6.079390073 188 | 54,59.07247174 189 | 51,56.43046022 190 | 27,30.49412933 191 | 46,48.35172635 192 | 92,89.73153611 193 | 73,72.86282528 194 | 77,80.97144285 195 | 91,91.36566374 196 | 61,60.07137496 197 | 99,99.87382707 198 | 4,8.655714172 199 | 72,69.39858505 200 | 19,19.38780134 201 | 57,53.11628433 202 | 78,78.39683006 203 | 26,25.75612514 204 | 74,75.07484683 205 | 90,92.88772282 206 | 66,69.45498498 207 | 13,13.12109842 208 | 40,48.09843134 209 | 77,79.3142548 210 | 67,68.48820749 211 | 75,73.2300846 212 | 23,24.68362712 213 | 45,41.90368917 214 | 59,62.22635684 215 | 44,45.96396877 216 | 23,23.52647153 217 | 55,51.80035866 218 | 55,51.10774273 219 | 95,95.79747345 220 | 12,9.241138977 221 | 4,7.646529763 222 | 7,9.281699753 223 | 100,103.5266162 224 | 48,47.41006725 225 | 42,42.03835773 226 | 96,96.11982476 227 | 39,38.05766408 228 | 100,105.4503788 229 | 87,88.80306911 230 | 14,15.49301141 231 | 14,12.42624606 232 | 37,40.00709598 233 | 5,5.634030902 234 | 88,87.36938931 235 | 91,89.73951993 236 | 65,66.61499643 237 | 74,72.9138853 238 | 56,57.19103506 239 | 16,11.21710477 240 | 5,0.676076749 241 | 28,28.15668543 242 | 92,95.3958003 243 | 46,52.05490703 244 | 54,59.70864577 245 | 39,36.79224762 246 | 44,37.08457698 247 | 31,24.18437976 248 | 68,67.28725332 249 | 86,82.870594 250 | 90,89.899991 251 | 38,36.94173178 252 | 21,19.87562242 253 | 95,90.71481654 254 | 56,61.09367762 255 | 60,60.11134958 256 | 65,64.83296316 257 | 78,81.40381769 258 | 89,92.40217686 259 | 6,2.576625376 260 | 67,63.80768172 261 | 36,38.67780759 262 | 16,16.82839701 263 | 100,99.78687252 264 | 45,44.68913433 265 | 73,71.00377824 266 | 57,51.57326718 267 | 20,19.87846479 268 | 76,79.50341495 269 | 34,34.58876491 270 | 55,55.7383467 271 | 72,68.19721905 272 | 55,55.81628509 273 | 8,9.391416798 274 | 56,56.01448111 275 | 72,77.9969477 276 | 58,55.37049953 277 | 6,11.89457829 278 | 96,94.79081712 279 | 23,25.69041546 280 | 58,53.52042319 281 | 23,18.31396758 282 | 19,21.42637785 283 | 25,30.41303282 284 | 64,67.68142149 285 | 21,17.0854783 286 | 59,60.91792707 287 | 19,14.99514319 288 | 16,16.74923937 289 | 42,41.46923883 290 | 43,42.84526108 291 | 61,59.12912974 292 | 92,91.30863673 293 | 11,8.673336357 294 | 41,39.31485292 295 | 1,5.313686205 296 | 8,5.405220518 297 | 71,68.5458879 298 | 46,47.33487629 299 | 55,54.09063686 300 | 62,63.29717058 301 | 47,52.45946688 -------------------------------------------------------------------------------- /lab_asssigment/6_gradient_descent/windows/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urllib.parse 3 | import urllib.request 4 | import json 5 | import argparse 6 | import os 7 | import types 8 | import sys 9 | 10 | 11 | parser = argparse.ArgumentParser(description="Autoscorer") 12 | parser.add_argument("filename", help="File to submit") 13 | parser.add_argument("hash", help="Hash key") 14 | 15 | args = parser.parse_args() 16 | if args.hash: 17 | hashkey = args.hash 18 | if args.filename: 19 | filename = args.filename 20 | 21 | 22 | class TestResult(unittest.TextTestResult): 23 | _previousTestClass = None 24 | _testRunEntered = False 25 | _moduleSetUpFailed = False 26 | 27 | def __init__(self, stream=None, descriptions=None, verbosity=1): 28 | super().__init__( 29 | stream=stream, descriptions=descriptions, verbosity=verbosity) 30 | self.tests_run = [] 31 | 32 | def getTestsReport(self): 33 | """Returns the run tests as a list of the form [test_id, result]""" 34 | return self.tests_run 35 | 36 | def addError(self, test, err): 37 | """Called when an error has occurred. 'err' is a tuple of values as 38 | returned by sys.exc_info(). 39 | """ 40 | super().addError(test, err) 41 | self.errors.append((test, self._exc_info_to_string(err, test))) 42 | self._mirrorOutput = True 43 | self.tests_run.append([test.id(), 0]) 44 | 45 | def addFailure(self, test, err): 46 | """Called when an error has occurred. 'err' is a tuple of values as 47 | returned by sys.exc_info().""" 48 | super().addFailure(test, err) 49 | self.failures.append((test, self._exc_info_to_string(err, test))) 50 | self._mirrorOutput = True 51 | self.tests_run.append([test.id(), 0]) 52 | 53 | def addSuccess(self, test): 54 | "Called when a test has completed successfully" 55 | super().addSuccess(test) 56 | self.tests_run.append([test.id(), 1]) 57 | 58 | 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model_gd.py') as response: 60 | test_code = response.read() 61 | 62 | 63 | test_module = types.ModuleType( 64 | 'test_code', 65 | doc='Test case') 66 | 67 | exec(test_code, test_module.__dict__) 68 | sys.modules['test_code'] = test_module 69 | 70 | import test_code as tc 71 | loader = unittest.loader.defaultTestLoader 72 | null_stream = open(os.devnull, "w") 73 | test_suite = loader.loadTestsFromModule(tc) 74 | result = unittest.TextTestRunner( 75 | stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite) 76 | 77 | print("Generating result sheet...") 78 | print("-------------------------------------------------------------------") 79 | print(" Test Case | Passed? | Feedback") 80 | print("-------------------------------------------------------------------") 81 | for c, r in result.tests_run: 82 | print("{0:s} | {1:s} | {2} ".format( 83 | c.rsplit('.', 1)[1].rjust(26), 84 | "PASSED" if r == 1 else "FAILED", 85 | "Good Job".rjust(10) if r == 1 else "Failed".rjust(10))) 86 | 87 | # print(json.dumps(result.tests_run)) 88 | print("Reading source file...") 89 | 90 | file = open(filename, "r") 91 | print("Transferring results to server...") 92 | payload = { 93 | 'hashkey': hashkey, 94 | 'result': result.tests_run, 95 | 'code': file.read() 96 | } 97 | try: 98 | data = urllib.parse.urlencode(payload) 99 | data = data.encode('ascii') 100 | req = urllib.request.Request('http://report.inflearn.com/submit', data) 101 | with urllib.request.urlopen(req) as response: 102 | resp = response.read() 103 | 104 | if json.loads(resp)['result'] == 0: 105 | print("Transfer failed: hash key is already used.") 106 | else: 107 | print("Transfer completed.") 108 | 109 | except Exception as e: 110 | print("Error occurred on transferring.", e) 111 | --------------------------------------------------------------------------------