├── .gitignore
├── README.md
├── code
    ├── ch10
    │   ├── 1_softmax_classifier_with_numpy.ipynb
    │   ├── 2_multiclass_with_sklearn.ipynb
    │   ├── 3_classification_service.ipynb
    │   ├── 5_pipeline_example.ipynb
    │   ├── 7_grid_search.ipynb
    │   ├── logicmodel.pkl
    │   ├── min_max.npy
    │   ├── my.png
    │   └── theta_bin.npy
    ├── ch100
    │   ├── -log_2.png
    │   ├── AllElectronics.csv
    │   ├── log_2.png
    │   ├── rental_price.csv
    │   ├── titanic.csv
    │   ├── titanic.png
    │   └── vegeterianl_dataset.csv
    ├── ch11
    │   ├── 1_simple_naive_bayes.ipynb
    │   ├── 2_german_credit_application.ipynb
    │   ├── 3_nb_classifier_with_sklearn.ipynb
    │   ├── 4_news_groups_analysis.ipynb
    │   ├── 99_bag_of_words.ipynb
    │   ├── 99_old_news_groups_analysis.ipynb
    │   ├── 99_spam_filter_with_sklearn.ipynb
    │   ├── data
    │   │   ├── 20021010_easy_ham.tar.bz2
    │   │   ├── 20021010_hard_ham.tar.bz2
    │   │   └── 20021010_spam.tar.bz2
    │   ├── fraud.csv
    │   └── imbalanced_dataset.ipynb
    ├── ch12
    │   ├── 1_entropy.ipynb
    │   ├── 2_DT_algorithm.ipynb
    │   ├── 3_Gini_Index.ipynb
    │   ├── 4_decision_tree_w_sklearn.ipynb
    │   ├── 5_split_continuous_attribute.ipynb
    │   ├── 6_regress_tree_with_sklearn.ipynb
    │   ├── AllElectronics.csv
    │   ├── rental_price.csv
    │   ├── titanic.png
    │   ├── titanic
    │   │   ├── test.csv
    │   │   └── train.csv
    │   └── vegeterianl_dataset.csv
    ├── ch13
    │   ├── 1_ensemble.ipynb
    │   ├── 2_bagging.ipynb
    │   ├── 3_random_forest.ipynb
    │   ├── 4_adaboost.ipynb
    │   ├── 5_gradient_boosting.ipynb
    │   ├── 6_xgboost.ipynb
    │   ├── 6_xgboost.py
    │   ├── 7_lightgbm.ipynb
    │   ├── 8_sklearn_style.ipynb
    │   ├── 9_stacking.ipynb
    │   ├── README.md
    │   ├── hourse_price_preprocessor.py
    │   ├── house_price
    │   │   ├── test.csv
    │   │   └── train.csv
    │   ├── stacking.py
    │   ├── tatanic_X_train.npy
    │   ├── tatanic_test.npy
    │   ├── tatanic_y_train.npy
    │   ├── titanic
    │   │   ├── test.csv
    │   │   └── train.csv
    │   ├── titanic_data_preprocessor.py
    │   └── xgboost_installation_guide.md
    ├── ch14
    │   ├── 1_log_transformation.ipynb
    │   ├── 2_PCA_example.ipynb
    │   ├── 3_univariate_select.ipynb
    │   ├── 4_model_based_feature_select.ipynb
    │   ├── 5_RFE.ipynb
    │   ├── 6_imbalanced_dataset.ipynb
    │   ├── 7_distributed_training.ipynb
    │   ├── 8_bigcon_analysis.ipynb
    │   ├── 9_automl_example.py
    │   ├── hourse_price_preprocessor.py
    │   ├── house_price
    │   │   ├── test.csv
    │   │   └── train.csv
    │   └── result.csv
    ├── ch2
    │   ├── 1_load_boston_house_price.ipynb
    │   ├── 2_numpy_example.ipynb
    │   ├── housing.data
    │   └── housing.names
    ├── ch3
    │   ├── 1_numpy_ndarray.ipynb
    │   ├── 20160901_20160930_public_list.csv
    │   ├── 2_numpy_reshape.ipynb
    │   ├── 3_indexing_slicing.ipynb
    │   ├── 4_numpy_creation_functions.ipynb
    │   ├── 5_ndarray_operation_functions.ipynb
    │   ├── 6_ndarray_operations.ipynb
    │   ├── 7_numpy_comparison.ipynb
    │   ├── 8_boolean_fancy_index.ipynb
    │   ├── 9_numpy_data_io.ipynb
    │   ├── int_data.csv
    │   ├── npy_test.npy
    │   └── populations.txt
    ├── ch4
    │   ├── 1_data_loading.ipynb
    │   ├── 2_model_representation.ipynb
    │   ├── 3_pandas_series.ipynb
    │   ├── 4_pandas_dataframe.ipynb
    │   ├── 5_data_selection.ipynb
    │   ├── 6_dataframe_basic_operation.ipynb
    │   ├── 7_map_apply_lambda.ipynb
    │   ├── 8_built_in_functions.ipynb
    │   ├── data
    │   │   ├── excel-comp-data.xlsx
    │   │   └── wages.csv
    │   └── wages.csv
    ├── ch5
    │   ├── 1_groupby_hierarchical_index.ipynb
    │   ├── 2_pivot_crosstab.ipynb
    │   ├── 3_merge_concat.ipynb
    │   ├── 4_db_persistence.ipynb
    │   └── data
    │   │   ├── AirPassengers.csv
    │   │   ├── customer-status.xlsx
    │   │   ├── excel-comp-data.xlsx
    │   │   ├── flights.db
    │   │   ├── movie_rating.csv
    │   │   ├── phone_data.csv
    │   │   ├── sales-feb-2014.xlsx
    │   │   ├── sales-jan-2014.xlsx
    │   │   └── sales-mar-2014.xlsx
    ├── ch6
    │   ├── 10_bike_prorblem.ipynb
    │   ├── 1_basic_plot.ipynb
    │   ├── 1_watcha_plotting.ipynb
    │   ├── 2_cost_function.ipynb
    │   ├── 2_data_plot.ipynb
    │   ├── 3_gradient_descent.ipynb
    │   ├── 3_missing_value.ipynb
    │   ├── 4_categorical_data.ipynb
    │   ├── 4_linear_regression_implementation.ipynb
    │   ├── 5_1_sgd.ipynb
    │   ├── 5_feature_scaling.ipynb
    │   ├── 5_multiple_regression_w_gd_example.ipynb
    │   ├── 6_data_normalization_viz.ipynb
    │   ├── 6_multiple_linear_regression_with_sklearn.ipynb
    │   ├── 7_house_price_easy.ipynb
    │   ├── 8_house_price_hard.ipynb
    │   ├── 9_pipeline_example.ipynb
    │   ├── test.csv
    │   ├── test.png
    │   ├── titanic
    │   │   ├── gender_submission.csv
    │   │   ├── submission_result.csv
    │   │   ├── test.csv
    │   │   ├── titanic_solution.ipynb
    │   │   └── train.csv
    │   └── train.csv
    ├── ch7
    │   ├── 1_gradient_descent.ipynb
    │   ├── 2_watcha_plotting.ipynb
    │   ├── 3_Linear_Regression_with_gradient_descent.ipynb
    │   ├── 4_multiple_regression_w_gd_example.ipynb
    │   ├── 5_multiple_linear_regression_with_sklearn.ipynb
    │   └── data
    │   │   └── slr06.csv
    ├── ch8
    │   ├── 1_optimization_examples.ipynb
    │   ├── 2_sklearn_lr.ipynb
    │   ├── 3_polynomial_regression .ipynb
    │   ├── 4_cross_validation.ipynb
    │   ├── 5_bike.ipynb
    │   ├── data
    │   │   ├── sampleSubmission.csv
    │   │   ├── test.csv
    │   │   └── train.csv
    │   ├── submission_data.csv
    │   ├── submission_lasso_data.csv
    │   └── yield.csv
    ├── ch9
    │   ├── 1_classification_problem_overview.ipynb
    │   ├── 2_sigmoid_function_overview.ipynb
    │   ├── 3_logistic_regression_with_numpy.ipynb
    │   ├── 4_Logistic_Regression_with_sklearn.ipynb
    │   ├── 5_performacne_metrics_for_classification.ipynb
    │   ├── 6_roc_curve.ipynb
    │   ├── data
    │   │   ├── generator.csv
    │   │   └── generators.csv
    │   └── uva.txt
    ├── ch99
    │   └── teamlab_classifier.py
    ├── kaggle
    │   ├── Untitled.ipynb
    │   ├── test.csv
    │   └── train.csv
    └── test.md
├── documents
    ├── How_to_use_spark-sklearn_using_Google_Dataproc(kor).ipynb
    ├── test.html
    └── test.md
└── lab_asssigment
    ├── 1_lab_numpy
        ├── README.md
        ├── linux_mac
        │   ├── install.sh
        │   ├── numpy_lab.py
        │   ├── submit.sh
        │   └── test.py
        ├── numpy_lab.pdf
        ├── numpy_lab.py
        └── windows
        │   ├── install.bat
        │   ├── numpy_lab.py
        │   ├── submit.bat
        │   └── test.py
    ├── 2_lab_build_matrix
        ├── 1000i.csv
        ├── README.md
        ├── build_matrix.pdf
        ├── build_matrix.py
        ├── images
        │   └── 2018
        │   │   └── 01
        │   │       └── matrix.png
        ├── linux_mac
        │   ├── build_matrix.py
        │   ├── install.sh
        │   ├── submit.sh
        │   └── test.py
        ├── movie_rating.csv
        └── windows
        │   ├── build_matrix.py
        │   ├── install.bat
        │   ├── submit.bat
        │   └── test.py
    ├── 5_normal_equation
        ├── README.md
        ├── lab_linear_model.pdf
        ├── linear_model.py
        ├── linear_regression_example.ipynb
        ├── linux_mac
        │   ├── install.sh
        │   ├── linear_model.py
        │   ├── linear_regression_example.ipynb
        │   ├── mlr09.csv
        │   ├── submit.sh
        │   ├── test.csv
        │   ├── test.py
        │   └── train.csv
        └── windows
        │   ├── install.bat
        │   ├── linear_model.py
        │   ├── linear_regression_example.ipynb
        │   ├── mlr09.csv
        │   ├── submit.bat
        │   ├── test.csv
        │   ├── test.py
        │   └── train.csv
    └── 6_gradient_descent
        ├── README.md
        ├── linear_model.py
        ├── linear_regression_example.ipynb
        ├── linux_mac
            ├── install.sh
            ├── linear_model.py
            ├── linear_regression_example.ipynb
            ├── mlr09.csv
            ├── submit.sh
            ├── test.csv
            ├── test.py
            └── train.csv
        ├── mlr09.csv
        ├── test.csv
        ├── train.csv
        └── windows
            ├── install.bat
            ├── linear_model.py
            ├── linear_regression_example.ipynb
            ├── mlr09.csv
            ├── submit.bat
            ├── test.csv
            ├── test.py
            └── train.csv


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *,cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # dotenv
 85 | .env
 86 | 
 87 | # virtualenv
 88 | .venv
 89 | venv/
 90 | ENV/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | ### JetBrains template
 98 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 99 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
100 | 
101 | # User-specific stuff:
102 | .idea/**/workspace.xml
103 | .idea/**/tasks.xml
104 | .idea/dictionaries
105 | .idea
106 | 
107 | # Sensitive or high-churn files:
108 | .idea/**/dataSources/
109 | .idea/**/dataSources.ids
110 | .idea/**/dataSources.xml
111 | .idea/**/dataSources.local.xml
112 | .idea/**/sqlDataSources.xml
113 | .idea/**/dynamic.xml
114 | .idea/**/uiDesigner.xml
115 | 
116 | # Gradle:
117 | .idea/**/gradle.xml
118 | .idea/**/libraries
119 | 
120 | # Mongo Explorer plugin:
121 | .idea/**/mongoSettings.xml
122 | 
123 | ## File-based project format:
124 | *.iws
125 | 
126 | ## Plugin-specific files:
127 | 
128 | # IntelliJ
129 | /out/
130 | 
131 | # mpeltonen/sbt-idea plugin
132 | .idea_modules/
133 | 
134 | # JIRA plugin
135 | atlassian-ide-plugin.xml
136 | 
137 | # Crashlytics plugin (for Android Studio and IntelliJ)
138 | com_crashlytics_export_strings.xml
139 | crashlytics.properties
140 | crashlytics-build.properties
141 | fabric.properties
142 | *.zip
143 | 


--------------------------------------------------------------------------------
/code/ch10/5_pipeline_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "columns = [\"id\", \"diagnosis\"] + [str(\"r\"+str(i)) for i in range(30)]\n",
 24 |     "df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data\", names=columns)\n",
 25 |     "X = df.loc[:,columns[2:]].values\n",
 26 |     "Y = df.loc[:,columns[1]].values\n",
 27 |     "\n",
 28 |     "from sklearn.model_selection import train_test_split\n",
 29 |     "x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=22)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 4,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [
 39 |     {
 40 |      "data": {
 41 |       "text/plain": [
 42 |        "((455, 30), (455,), (114, 30), (114,))"
 43 |       ]
 44 |      },
 45 |      "execution_count": 4,
 46 |      "metadata": {},
 47 |      "output_type": "execute_result"
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "x_train.shape, y_train.shape, x_test.shape, y_test.shape"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 16,
 57 |    "metadata": {
 58 |     "collapsed": false
 59 |    },
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "0.97368421052631582"
 65 |       ]
 66 |      },
 67 |      "execution_count": 16,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "from sklearn.preprocessing import StandardScaler\n",
 74 |     "from sklearn.linear_model import LogisticRegression\n",
 75 |     "from sklearn.pipeline import Pipeline\n",
 76 |     "\n",
 77 |     "pipe_lr = Pipeline(steps=[('scl', StandardScaler()), ('clf', LogisticRegression())])\n",
 78 |     "\n",
 79 |     "pipe_lr.fit(X=x_train, y=y_train)\n",
 80 |     "pipe_lr.score(x_test, y_test)"
 81 |    ]
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "anaconda-cloud": {},
 86 |   "kernelspec": {
 87 |    "display_name": "Python [default]",
 88 |    "language": "python",
 89 |    "name": "python3"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.5.2"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 2
106 | }
107 | 


--------------------------------------------------------------------------------
/code/ch10/logicmodel.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/logicmodel.pkl


--------------------------------------------------------------------------------
/code/ch10/min_max.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/min_max.npy


--------------------------------------------------------------------------------
/code/ch10/my.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/my.png


--------------------------------------------------------------------------------
/code/ch10/theta_bin.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch10/theta_bin.npy


--------------------------------------------------------------------------------
/code/ch100/-log_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch100/-log_2.png


--------------------------------------------------------------------------------
/code/ch100/log_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch100/log_2.png


--------------------------------------------------------------------------------
/code/ch100/rental_price.csv:
--------------------------------------------------------------------------------
 1 | ID	SEASON	WORK_DAY	RENTALS
 2 | 1	winter	FALSE	800
 3 | 2	winter	FALSE	826
 4 | 3	winter	TRUE	900
 5 | 4	spring	FALSE	2100
 6 | 5	spring	TRUE	4740
 7 | 6	spring	TRUE	4900
 8 | 8	summer	TRUE	3000
 9 | 9	summer	TRUE	5800
10 | 10	autumn	FALSE	6200
11 | 11	autumn	FALSE	2910
12 | 12	autumn	TRUE	2880
13 | 7	summer	FALSE	2820


--------------------------------------------------------------------------------
/code/ch100/titanic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch100/titanic.png


--------------------------------------------------------------------------------
/code/ch100/vegeterianl_dataset.csv:
--------------------------------------------------------------------------------
1 | ID STREAM SLOPE ELEVATION VEGETATION
2 | 1 false steep 3900 chapparal
3 | 2 true moderate 300 riparian
4 | 3 true steep 1500 riparian
5 | 4 false steep 1200 chapparal
6 | 5 false flat 4450 conifer
7 | 6 true steep 5000 conifer
8 | 7 true steep 3000 chapparal


--------------------------------------------------------------------------------
/code/ch11/1_simple_naive_bayes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from pandas import Series, DataFrame\n",
 12 |     "import pandas as pd\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 7,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "data": {
 23 |       "text/html": [
 24 |        "<div>\n",
 25 |        "<style>\n",
 26 |        "    .dataframe thead tr:only-child th {\n",
 27 |        "        text-align: right;\n",
 28 |        "    }\n",
 29 |        "\n",
 30 |        "    .dataframe thead th {\n",
 31 |        "        text-align: left;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "</style>\n",
 38 |        "<table border=\"1\" class=\"dataframe\">\n",
 39 |        "  <thead>\n",
 40 |        "    <tr style=\"text-align: right;\">\n",
 41 |        "      <th></th>\n",
 42 |        "      <th>viagra</th>\n",
 43 |        "      <th>spam</th>\n",
 44 |        "    </tr>\n",
 45 |        "  </thead>\n",
 46 |        "  <tbody>\n",
 47 |        "    <tr>\n",
 48 |        "      <th>0</th>\n",
 49 |        "      <td>1</td>\n",
 50 |        "      <td>1</td>\n",
 51 |        "    </tr>\n",
 52 |        "    <tr>\n",
 53 |        "      <th>1</th>\n",
 54 |        "      <td>0</td>\n",
 55 |        "      <td>0</td>\n",
 56 |        "    </tr>\n",
 57 |        "    <tr>\n",
 58 |        "      <th>2</th>\n",
 59 |        "      <td>0</td>\n",
 60 |        "      <td>0</td>\n",
 61 |        "    </tr>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>3</th>\n",
 64 |        "      <td>0</td>\n",
 65 |        "      <td>0</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>4</th>\n",
 69 |        "      <td>0</td>\n",
 70 |        "      <td>0</td>\n",
 71 |        "    </tr>\n",
 72 |        "  </tbody>\n",
 73 |        "</table>\n",
 74 |        "</div>"
 75 |       ],
 76 |       "text/plain": [
 77 |        "   viagra  spam\n",
 78 |        "0       1     1\n",
 79 |        "1       0     0\n",
 80 |        "2       0     0\n",
 81 |        "3       0     0\n",
 82 |        "4       0     0"
 83 |       ]
 84 |      },
 85 |      "execution_count": 7,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "# Example from - https://chrisalbon.com/python/pandas_map_values_to_values.html\n",
 92 |     "viagra_spam = {'viagra': [1,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1],\n",
 93 |     "               'spam': [\n",
 94 |     "                   1,0,0,0,0,0,1,0,1,0, 0,0,0,0,0,0,0,1,1,1\n",
 95 |     "               ]}\n",
 96 |     "df = pd.DataFrame(viagra_spam, columns = ['viagra', 'spam'])\n",
 97 |     "df.head()"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 8,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "array([[1, 1],\n",
109 |        "       [0, 0],\n",
110 |        "       [0, 0],\n",
111 |        "       [0, 0],\n",
112 |        "       [0, 0],\n",
113 |        "       [0, 0],\n",
114 |        "       [0, 1],\n",
115 |        "       [0, 0],\n",
116 |        "       [1, 1],\n",
117 |        "       [1, 0],\n",
118 |        "       [1, 0],\n",
119 |        "       [0, 0],\n",
120 |        "       [0, 0],\n",
121 |        "       [1, 0],\n",
122 |        "       [0, 0],\n",
123 |        "       [0, 0],\n",
124 |        "       [0, 0],\n",
125 |        "       [0, 1],\n",
126 |        "       [0, 1],\n",
127 |        "       [1, 1]])"
128 |       ]
129 |      },
130 |      "execution_count": 8,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "np_data = df.as_matrix()\n",
137 |     "np_data"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 35,
143 |    "metadata": {
144 |     "collapsed": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "# P(Viagra)\n",
149 |     "p_viagra = sum(np_data[:, 0] == 1) / len(np_data)\n",
150 |     "p_spam = sum(np_data[:, 1] == 1) / len(np_data)\n",
151 |     "p_v_cap_s = sum((np_data[:, 0] == 1) & (np_data[:, 1] == 1)) / len(np_data)\n",
152 |     "p_n_v_cap_s = sum((np_data[:, 0] == 0) & (np_data[:, 1] == 1)) / len(np_data)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 33,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "0.5"
164 |       ]
165 |      },
166 |      "execution_count": 33,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "# P(spam | viagra)\n",
173 |     "p_spam * (p_v_cap_s / p_spam ) / p_viagra"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 37,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "0.2142857142857143"
185 |       ]
186 |      },
187 |      "execution_count": 37,
188 |      "metadata": {},
189 |      "output_type": "execute_result"
190 |     }
191 |    ],
192 |    "source": [
193 |     "# P(spam | ~viagra)\n",
194 |     "p_spam * (p_n_v_cap_s / p_spam ) / (1-p_viagra)"
195 |    ]
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "Python 3",
201 |    "language": "python",
202 |    "name": "python3"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 3
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython3",
214 |    "version": "3.6.1"
215 |   }
216 |  },
217 |  "nbformat": 4,
218 |  "nbformat_minor": 2
219 | }
220 | 


--------------------------------------------------------------------------------
/code/ch11/data/20021010_easy_ham.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch11/data/20021010_easy_ham.tar.bz2


--------------------------------------------------------------------------------
/code/ch11/data/20021010_hard_ham.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch11/data/20021010_hard_ham.tar.bz2


--------------------------------------------------------------------------------
/code/ch11/data/20021010_spam.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch11/data/20021010_spam.tar.bz2


--------------------------------------------------------------------------------
/code/ch11/fraud.csv:
--------------------------------------------------------------------------------
 1 | ID,History,CoApplicant,Accommodation,Fraud
 2 | 1,current,none,own,true
 3 | 2,paid,none,own,false
 4 | 3,paid,none,own,false
 5 | 4,paid,guarantor,rent,true
 6 | 5,arrears,none,own,false
 7 | 6,arrears,none,own,true
 8 | 7,current,none,own,false
 9 | 8,arrears,none,own,false
10 | 9,current,none,rent,false
11 | 10,none,none,own,true
12 | 11,current,coapplicant,own,false
13 | 12,current,none,own,true
14 | 13,current,none,rent,true
15 | 14,paid,none,own,false
16 | 15,arrears,none,own,false
17 | 16,current,none,own,false
18 | 17,arrears,coapplicant,rent,false
19 | 18,arrears,none,free,false
20 | 19,arrears,none,own,false
21 | 20,paid,none,own,false
22 | 


--------------------------------------------------------------------------------
/code/ch12/6_regress_tree_with_sklearn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 29,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn.datasets import load_boston\n",
 12 |     "from sklearn.cross_validation import cross_val_score\n",
 13 |     "from sklearn.cross_validation import KFold\n",
 14 |     "import numpy as np"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 80,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "boston = load_boston()\n",
 26 |     "X, y = boston.data, boston.target\n",
 27 |     "features = boston.feature_names\n",
 28 |     "\n",
 29 |     "crossvalidation = KFold(n=X.shape[0], n_folds=10,\n",
 30 |     " shuffle=True, random_state=1)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 81,
 36 |    "metadata": {
 37 |     "collapsed": false
 38 |    },
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "Mean squared error: 18.540\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "from sklearn.tree import DecisionTreeRegressor\n",
 50 |     "regression_tree = tree.DecisionTreeRegressor(\n",
 51 |     "    min_samples_split=3, min_samples_leaf=3, random_state=0, max_leaf_nodes=20)\n",
 52 |     "regression_tree.fit(X,y)\n",
 53 |     "score = np.mean(\n",
 54 |     "    cross_val_score(regression_tree, X, y, scoring='neg_mean_squared_error', cv=crossvalidation, n_jobs=1))\n",
 55 |     "print ('Mean squared error: %.3f' % abs(score))"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 82,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "Mean squared error: 23.764\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "from sklearn import linear_model\n",
 75 |     "regr = linear_model.LinearRegression(normalize=True)\n",
 76 |     "score = np.mean(\n",
 77 |     "    cross_val_score(regr, X, y, scoring='neg_mean_squared_error', cv=crossvalidation, n_jobs=1))\n",
 78 |     "print ('Mean squared error: %.3f' % abs(score))"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 89,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "from sklearn.datasets import load_iris\n",
 90 |     "iris = load_iris()\n",
 91 |     "X, y = iris.data, iris.target\n",
 92 |     "features = iris.feature_names\n",
 93 |     "\n",
 94 |     "crossvalidation = KFold(n=X.shape[0], n_folds=5,\n",
 95 |     " shuffle=True, random_state=1)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 100,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [
105 |     {
106 |      "name": "stdout",
107 |      "output_type": "stream",
108 |      "text": [
109 |       "Depth: 1 Accuracy: 0.580\n",
110 |       "Depth: 2 Accuracy: 0.913\n",
111 |       "Depth: 3 Accuracy: 0.920\n",
112 |       "Depth: 4 Accuracy: 0.940\n",
113 |       "Depth: 5 Accuracy: 0.920\n"
114 |      ]
115 |     }
116 |    ],
117 |    "source": [
118 |     "from sklearn import tree\n",
119 |     "for depth in range(1,10):\n",
120 |     "    tree_classifier = tree.DecisionTreeClassifier(\n",
121 |     "    max_depth=depth, random_state=0)\n",
122 |     "    if tree_classifier.fit(X,y).tree_.max_depth < depth:\n",
123 |     "        break\n",
124 |     "    score = np.mean(cross_val_score(tree_classifier, X, y,\n",
125 |     "    scoring='accuracy', cv=crossvalidation, n_jobs=1))\n",
126 |     "    print ('Depth: %i Accuracy: %.3f' % (depth,score))"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "kernelspec": {
132 |    "display_name": "Python [conda env:ml_scratch]",
133 |    "language": "python",
134 |    "name": "conda-env-ml_scratch-py"
135 |   },
136 |   "language_info": {
137 |    "codemirror_mode": {
138 |     "name": "ipython",
139 |     "version": 3
140 |    },
141 |    "file_extension": ".py",
142 |    "mimetype": "text/x-python",
143 |    "name": "python",
144 |    "nbconvert_exporter": "python",
145 |    "pygments_lexer": "ipython3",
146 |    "version": "3.6.2"
147 |   }
148 |  },
149 |  "nbformat": 4,
150 |  "nbformat_minor": 2
151 | }
152 | 


--------------------------------------------------------------------------------
/code/ch12/rental_price.csv:
--------------------------------------------------------------------------------
 1 | ID	SEASON	WORK_DAY	RENTALS
 2 | 1	winter	FALSE	800
 3 | 2	winter	FALSE	826
 4 | 3	winter	TRUE	900
 5 | 4	spring	FALSE	2100
 6 | 5	spring	TRUE	4740
 7 | 6	spring	TRUE	4900
 8 | 8	summer	TRUE	3000
 9 | 9	summer	TRUE	5800
10 | 10	autumn	FALSE	6200
11 | 11	autumn	FALSE	2910
12 | 12	autumn	TRUE	2880
13 | 7	summer	FALSE	2820


--------------------------------------------------------------------------------
/code/ch12/titanic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch12/titanic.png


--------------------------------------------------------------------------------
/code/ch12/vegeterianl_dataset.csv:
--------------------------------------------------------------------------------
1 | ID STREAM SLOPE ELEVATION VEGETATION
2 | 1 false steep 3900 chapparal
3 | 2 true moderate 300 riparian
4 | 3 true steep 1500 riparian
5 | 4 false steep 1200 chapparal
6 | 5 false flat 4450 conifer
7 | 6 true steep 5000 conifer
8 | 7 true steep 3000 chapparal


--------------------------------------------------------------------------------
/code/ch13/1_ensemble.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "from sklearn.tree import DecisionTreeClassifier\n",
 11 |     "from sklearn.linear_model import LogisticRegression\n",
 12 |     "from sklearn.naive_bayes import GaussianNB\n",
 13 |     "from sklearn.ensemble import VotingClassifier"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 9,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "X = np.load(\"./tatanic_X_train.npy\")\n",
 23 |     "y = np.load(\"./tatanic_y_train.npy\")"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 13,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "array([0.27345609, 0.01415106, 0.        , 1.        , 0.        ,\n",
 35 |        "       0.125     , 0.        , 0.        , 0.        , 1.        ,\n",
 36 |        "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
 37 |        "       1.        , 0.        , 0.        , 1.        , 0.        ,\n",
 38 |        "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
 39 |        "       0.        , 0.        ])"
 40 |       ]
 41 |      },
 42 |      "execution_count": 13,
 43 |      "metadata": {},
 44 |      "output_type": "execute_result"
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "X[0]"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 16,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "array([0., 1., 1., 1., 0., 0., 0., 0., 1., 1.])"
 60 |       ]
 61 |      },
 62 |      "execution_count": 16,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "y[:10]"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 47,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "clf1 = LogisticRegression(random_state=1)\n",
 78 |     "clf2 = DecisionTreeClassifier(random_state=1)\n",
 79 |     "clf3 = GaussianNB()\n",
 80 |     "eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 48,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "data": {
 90 |       "text/plain": [
 91 |        "0.8020504030978227"
 92 |       ]
 93 |      },
 94 |      "execution_count": 48,
 95 |      "metadata": {},
 96 |      "output_type": "execute_result"
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "from sklearn.model_selection import cross_val_score\n",
101 |     "cross_val_score(eclf, X, y, cv=5).mean()"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 49,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "data": {
111 |       "text/plain": [
112 |        "0.8290420872214816"
113 |       ]
114 |      },
115 |      "execution_count": 49,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "cross_val_score(clf1, X, y, cv=5).mean()"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 50,
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "data": {
131 |       "text/plain": [
132 |        "0.7840411350219006"
133 |       ]
134 |      },
135 |      "execution_count": 50,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": [
141 |     "cross_val_score(clf2, X, y, cv=5).mean()"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 51,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "data": {
151 |       "text/plain": [
152 |        "0.4600139655938551"
153 |       ]
154 |      },
155 |      "execution_count": 51,
156 |      "metadata": {},
157 |      "output_type": "execute_result"
158 |     }
159 |    ],
160 |    "source": [
161 |     "cross_val_score(clf3, X, y, cv=5).mean()"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 62,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "clf1 = LogisticRegression(random_state=1)\n",
171 |     "clf2 = DecisionTreeClassifier(random_state=1)\n",
172 |     "eclf = VotingClassifier(estimators=[('lr', clf1), ('dt', clf2)], voting='hard')"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 63,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "0.8222687742017394"
184 |       ]
185 |      },
186 |      "execution_count": 63,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "cross_val_score(eclf, X, y, cv=5).mean()"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 81,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "c_params = [0.1,  5.0, 7.0, 10.0, 15.0, 20.0, 100.0]\n",
202 |     "\n",
203 |     "\n",
204 |     "params ={\n",
205 |     "    \"lr__solver\" : ['liblinear'], \"lr__penalty\" : [\"l2\"], \"lr__C\" : c_params,\"dt__criterion\" : [\"gini\", \"entropy\"],\n",
206 |     "    \"dt__max_depth\" : [10,8,7,6,5,4,3,2],\n",
207 |     "    \"dt__min_samples_leaf\": [1,2,3,4,5,6,7,8,9]\n",
208 |     "    }"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "from sklearn.model_selection import GridSearchCV\n",
218 |     "grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)\n",
219 |     "grid = grid.fit(X, y)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "grid.best_score_"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "grid.best_params_"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "kernelspec": {
243 |    "display_name": "Python 3",
244 |    "language": "python",
245 |    "name": "python3"
246 |   },
247 |   "language_info": {
248 |    "codemirror_mode": {
249 |     "name": "ipython",
250 |     "version": 3
251 |    },
252 |    "file_extension": ".py",
253 |    "mimetype": "text/x-python",
254 |    "name": "python",
255 |    "nbconvert_exporter": "python",
256 |    "pygments_lexer": "ipython3",
257 |    "version": "3.6.5"
258 |   }
259 |  },
260 |  "nbformat": 4,
261 |  "nbformat_minor": 2
262 | }
263 | 


--------------------------------------------------------------------------------
/code/ch13/4_adaboost.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 85,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 87,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "['three' 'one' 'one' 'two' 'three' 'two' 'three' 'three' 'one' 'one']\n"
 22 |      ]
 23 |     }
 24 |    ],
 25 |    "source": [
 26 |     "elements = ['one', 'two', 'three'] \n",
 27 |     "weights = [0.2, 0.3, 0.5]\n",
 28 |     "\n",
 29 |     "from numpy.random import choice\n",
 30 |     "print(choice(elements, size=10, replace=True, p=weights))"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 88,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "X = np.load(\"./tatanic_X_train.npy\")\n",
 40 |     "y = np.load(\"./tatanic_y_train.npy\")"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 89,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "data": {
 50 |       "text/plain": [
 51 |        "array([0.27345609, 0.01415106, 0.        , 1.        , 0.        ,\n",
 52 |        "       0.125     , 0.        , 0.        , 0.        , 1.        ,\n",
 53 |        "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
 54 |        "       1.        , 0.        , 0.        , 1.        , 0.        ,\n",
 55 |        "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
 56 |        "       0.        , 0.        ])"
 57 |       ]
 58 |      },
 59 |      "execution_count": 89,
 60 |      "metadata": {},
 61 |      "output_type": "execute_result"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "X[0]"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 90,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/plain": [
 76 |        "array([0., 1., 1., 1., 0., 0., 0., 0., 1., 1.])"
 77 |       ]
 78 |      },
 79 |      "execution_count": 90,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "y[:10]"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 91,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "from sklearn.ensemble import AdaBoostClassifier\n",
 95 |     "from sklearn.tree import DecisionTreeClassifier\n"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 98,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "eclf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=2), n_estimators=500, \n",
105 |     "                          learning_rate=0.1)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 99,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "0.8155272011680316"
117 |       ]
118 |      },
119 |      "execution_count": 99,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "from sklearn.model_selection import cross_val_score\n",
126 |     "cross_val_score(eclf, X, y, cv=5).mean()"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 100,
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
138 |        "            max_features=None, max_leaf_nodes=None,\n",
139 |        "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
140 |        "            min_samples_leaf=1, min_samples_split=2,\n",
141 |        "            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
142 |        "            splitter='best')"
143 |       ]
144 |      },
145 |      "execution_count": 100,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "from sklearn.tree import DecisionTreeClassifier\n",
152 |     "DecisionTreeClassifier()"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "AdaBoostClassifier()"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 131,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "params = {\"base_estimator__criterion\" : [\"gini\", \"entropy\"],\n",
171 |     "          \"base_estimator__max_features\" : [7,8,],\n",
172 |     "          \"base_estimator__max_depth\" : [1,2],\n",
173 |     "          \"n_estimators\": [23,24, 25, 26, 27],\n",
174 |     "          \"learning_rate\": [0.4, 0.45, 0.5, 0.55, 0.6]\n",
175 |     "         }"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "from sklearn.model_selection import GridSearchCV\n",
185 |     "grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5, n_jobs=7)\n",
186 |     "grid = grid.fit(X, y)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 128,
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "data": {
196 |       "text/plain": [
197 |        "0.8312710911136107"
198 |       ]
199 |      },
200 |      "execution_count": 128,
201 |      "metadata": {},
202 |      "output_type": "execute_result"
203 |     }
204 |    ],
205 |    "source": [
206 |     "grid.best_score_"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 129,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "data": {
216 |       "text/plain": [
217 |        "{'base_estimator__criterion': 'gini',\n",
218 |        " 'base_estimator__max_depth': 2,\n",
219 |        " 'base_estimator__max_features': 8,\n",
220 |        " 'learning_rate': 0.6,\n",
221 |        " 'n_estimators': 26}"
222 |       ]
223 |      },
224 |      "execution_count": 129,
225 |      "metadata": {},
226 |      "output_type": "execute_result"
227 |     }
228 |    ],
229 |    "source": [
230 |     "grid.best_params_"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 130,
236 |    "metadata": {},
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "array([0.19406886, 0.23061462, 0.04090978, 0.05602668, 0.06539204,\n",
242 |        "       0.06219571, 0.04414943, 0.03733474, 0.02491265, 0.00782351,\n",
243 |        "       0.        , 0.        , 0.01205415, 0.05311241, 0.01024874,\n",
244 |        "       0.04730915, 0.        , 0.01836485, 0.00899386, 0.        ,\n",
245 |        "       0.00761332, 0.03372184, 0.02510816, 0.02004552, 0.        ,\n",
246 |        "       0.        , 0.        ])"
247 |       ]
248 |      },
249 |      "execution_count": 130,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "grid.best_estimator_.feature_importances_"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": []
264 |   }
265 |  ],
266 |  "metadata": {
267 |   "kernelspec": {
268 |    "display_name": "Python 3",
269 |    "language": "python",
270 |    "name": "python3"
271 |   },
272 |   "language_info": {
273 |    "codemirror_mode": {
274 |     "name": "ipython",
275 |     "version": 3
276 |    },
277 |    "file_extension": ".py",
278 |    "mimetype": "text/x-python",
279 |    "name": "python",
280 |    "nbconvert_exporter": "python",
281 |    "pygments_lexer": "ipython3",
282 |    "version": "3.6.5"
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 2
287 | }
288 | 


--------------------------------------------------------------------------------
/code/ch13/6_xgboost.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import xgboost as xgb"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "X = np.load(\"./tatanic_X_train.npy\")\n",
 24 |     "y = np.load(\"./tatanic_y_train.npy\")"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 5,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "from sklearn.cross_validation import train_test_split"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 11,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.3)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 12,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "dtrain = xgb.DMatrix(X_train, label=y_train)\n",
 52 |     "dtest = xgb.DMatrix(X_test, label=y_test)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 13,
 58 |    "metadata": {
 59 |     "collapsed": true
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "param = {'max_depth': 2, 'eta': 0.5, 'silent': 1, 'objective': 'binary:logistic'}\n",
 64 |     "param['nthread'] = 7\n",
 65 |     "param['eval_metric'] = 'auc'\n",
 66 |     "evallist = [(dtest, 'eval'), (dtrain, 'train')]\n",
 67 |     "plst = param.items()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 14,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "[0]\teval-auc:0.844072\ttrain-auc:0.828469\n",
 80 |       "[1]\teval-auc:0.877096\ttrain-auc:0.85691\n",
 81 |       "[2]\teval-auc:0.88491\ttrain-auc:0.854543\n",
 82 |       "[3]\teval-auc:0.893383\ttrain-auc:0.86737\n",
 83 |       "[4]\teval-auc:0.891856\ttrain-auc:0.873424\n",
 84 |       "[5]\teval-auc:0.89488\ttrain-auc:0.879952\n",
 85 |       "[6]\teval-auc:0.898204\ttrain-auc:0.886382\n",
 86 |       "[7]\teval-auc:0.897515\ttrain-auc:0.88684\n",
 87 |       "[8]\teval-auc:0.893114\ttrain-auc:0.890581\n",
 88 |       "[9]\teval-auc:0.894431\ttrain-auc:0.895124\n",
 89 |       "[10]\teval-auc:0.897036\ttrain-auc:0.89621\n",
 90 |       "[11]\teval-auc:0.897036\ttrain-auc:0.898418\n",
 91 |       "[12]\teval-auc:0.898772\ttrain-auc:0.897682\n",
 92 |       "[13]\teval-auc:0.898952\ttrain-auc:0.898108\n",
 93 |       "[14]\teval-auc:0.900509\ttrain-auc:0.89922\n",
 94 |       "[15]\teval-auc:0.891617\ttrain-auc:0.904314\n",
 95 |       "[16]\teval-auc:0.892036\ttrain-auc:0.904407\n",
 96 |       "[17]\teval-auc:0.886228\ttrain-auc:0.906359\n",
 97 |       "[18]\teval-auc:0.888024\ttrain-auc:0.91106\n",
 98 |       "[19]\teval-auc:0.886916\ttrain-auc:0.912811\n",
 99 |       "[20]\teval-auc:0.888114\ttrain-auc:0.912549\n",
100 |       "[21]\teval-auc:0.887874\ttrain-auc:0.913454\n",
101 |       "[22]\teval-auc:0.888473\ttrain-auc:0.913858\n",
102 |       "[23]\teval-auc:0.888593\ttrain-auc:0.914807\n",
103 |       "[24]\teval-auc:0.890958\ttrain-auc:0.91875\n",
104 |       "[25]\teval-auc:0.892305\ttrain-auc:0.921106\n",
105 |       "[26]\teval-auc:0.893323\ttrain-auc:0.921101\n",
106 |       "[27]\teval-auc:0.891078\ttrain-auc:0.922229\n",
107 |       "[28]\teval-auc:0.890539\ttrain-auc:0.922982\n",
108 |       "[29]\teval-auc:0.89012\ttrain-auc:0.923898\n",
109 |       "[30]\teval-auc:0.891078\ttrain-auc:0.923604\n",
110 |       "[31]\teval-auc:0.891228\ttrain-auc:0.924171\n",
111 |       "[32]\teval-auc:0.891377\ttrain-auc:0.924455\n",
112 |       "[33]\teval-auc:0.89003\ttrain-auc:0.926942\n",
113 |       "[34]\teval-auc:0.889072\ttrain-auc:0.927061\n",
114 |       "[35]\teval-auc:0.89012\ttrain-auc:0.928136\n",
115 |       "[36]\teval-auc:0.891856\ttrain-auc:0.928114\n",
116 |       "[37]\teval-auc:0.888413\ttrain-auc:0.929985\n",
117 |       "[38]\teval-auc:0.89003\ttrain-auc:0.929259\n",
118 |       "[39]\teval-auc:0.890509\ttrain-auc:0.931899\n",
119 |       "[40]\teval-auc:0.89021\ttrain-auc:0.93269\n",
120 |       "[41]\teval-auc:0.888802\ttrain-auc:0.934086\n",
121 |       "[42]\teval-auc:0.889641\ttrain-auc:0.933922\n",
122 |       "[43]\teval-auc:0.889251\ttrain-auc:0.934953\n",
123 |       "[44]\teval-auc:0.889731\ttrain-auc:0.935924\n",
124 |       "[45]\teval-auc:0.88979\ttrain-auc:0.936611\n",
125 |       "[46]\teval-auc:0.890269\ttrain-auc:0.937489\n",
126 |       "[47]\teval-auc:0.890569\ttrain-auc:0.937784\n",
127 |       "[48]\teval-auc:0.891467\ttrain-auc:0.93858\n",
128 |       "[49]\teval-auc:0.888413\ttrain-auc:0.939332\n"
129 |      ]
130 |     }
131 |    ],
132 |    "source": [
133 |     "num_round = 50\n",
134 |     "bst = xgb.train(plst, dtrain, num_round, evallist)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": true
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "ypred = bst.predict(dtest, ntree_limit=bst.best_ntree_limit)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 17,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "data": {
155 |       "text/plain": [
156 |        "0.83895131086142327"
157 |       ]
158 |      },
159 |      "execution_count": 17,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "(sum((ypred>0.5) == y_test)) / 267.0"
166 |    ]
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "Python 2",
172 |    "language": "python",
173 |    "name": "python2"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 2
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython2",
185 |    "version": "2.7.13"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 2
190 | }
191 | 


--------------------------------------------------------------------------------
/code/ch13/6_xgboost.py:
--------------------------------------------------------------------------------
1 | import xgboost as xgb
2 | 


--------------------------------------------------------------------------------
/code/ch13/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/README.md


--------------------------------------------------------------------------------
/code/ch13/hourse_price_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn import preprocessing
 4 | 
 5 | def get_train_test_split_dataset(train_dataset_filename=None,
 6 |                                 test_dataset_filename = None):
 7 |     df_train = pd.read_csv(train_dataset_filename)
 8 |     df_test = pd.read_csv(test_dataset_filename)
 9 | 
10 | 
11 |     # Train_Test_concat & Trarget_value Extract
12 |     train_index = df_train['Id'].values-1
13 |     test_index = df_test['Id'].values-1
14 |     target_value = df_train.iloc[:,-1]
15 | 
16 |     df_concat = pd.concat([df_train.iloc[:,:-1],df_test],axis=0,ignore_index=True)
17 |     df_concat_numeric = df_concat.loc[:,df_concat.dtypes!='object']
18 |     ###############################################################
19 |     # 칼럼 Searching
20 |     df_concat_numeric.drop(['MoSold'],axis=1,inplace=True)
21 | 
22 |     ###############################################################
23 |     # Missing_value_Solve
24 |     null_colums = df_concat_numeric.isnull().sum().sort_values(ascending=False)[df_concat_numeric.isnull().sum().sort_values(ascending=False)>0].index.tolist()
25 |     ### GarageYrBlt 특이치 해결
26 |     df_concat_numeric.GarageYrBlt.loc[2592] = df_concat_numeric.GarageYrBlt.loc[2592]-200
27 | 
28 |     ### Null_solve_function###################
29 |     def null_solve(data_frame, null_list):
30 |         for column in null_list:
31 |             data_frame[column].fillna(data_frame[column].mean(),inplace=True)
32 |     ##########################################
33 |     null_solve(df_concat_numeric,null_colums)
34 | 
35 |     ### 리모델링 여부 반영
36 |     df_concat_numeric['Remodleling'] = df_concat_numeric['YearBuilt']!=df_concat_numeric['YearRemodAdd']
37 | 
38 |     df_quality_type = df_concat_numeric[['MSSubClass','OverallQual','OverallCond']]
39 |     df_quantity_type = df_concat_numeric.drop(['MSSubClass','OverallQual','OverallCond'],axis=1)
40 | 
41 |     ###############################################################
42 |     #Scaling_value
43 |     ###Min_Max Scaling
44 |     #from sklearn import preprocessing
45 |     minmax_scale = preprocessing.MinMaxScaler().fit(df_quantity_type.iloc[train_index,1:].values)
46 |     x_quantitiy_scaled = minmax_scale.transform(df_quantity_type.iloc[train_index,1:].values)
47 | 
48 |     ###One_hot Scaling
49 |     one_hot = preprocessing.OneHotEncoder()
50 |     one_hot.fit(df_quality_type.iloc[train_index].values)
51 |     x_quality_scaled = one_hot.transform(df_quality_type.iloc[train_index].values).toarray()
52 | 
53 |     #Train
54 |     x_scaled_data = np.hstack((x_quality_scaled,x_quantitiy_scaled))
55 |     Y_scaled_data = target_value.reshape(-1,)
56 | 
57 | 
58 | 
59 |     ###############################################################
60 |     #Predict
61 |     x_quan_predict_scaled = minmax_scale.transform(df_quantity_type.iloc[test_index,1:].values)
62 |     x_qual_predict_scaled = one_hot.transform(df_quality_type.iloc[test_index].values).toarray()
63 |     X_scaled_predict = np.hstack((x_qual_predict_scaled,x_quan_predict_scaled))
64 | 
65 | 
66 |     X_train = x_scaled_data
67 |     y_train = Y_scaled_data
68 |     X_test = X_scaled_predict
69 |     test_id_idx = test_index + 1
70 | 
71 |     return X_train, X_test, y_train, test_id_idx
72 | 


--------------------------------------------------------------------------------
/code/ch13/stacking.py:
--------------------------------------------------------------------------------
 1 | from sklearn.base import RegressorMixin, ClassifierMixin
 2 | from sklearn.base import BaseEstimator
 3 | 
 4 | from sklearn.model_selection import train_test_split
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class MyStackingRegressor(BaseEstimator, RegressorMixin):
10 |     def __init__(self, meta_estimator, base_estimators, test_ratio=0.2, feature_weights=None):
11 |         """
12 |         Called when initializing the classifier
13 |         """
14 |         self.meta_estimator = meta_estimator
15 |         self.base_estimators = base_estimators
16 |         self.feature_weights = feature_weights
17 |         self.test_ratio = test_ratio
18 | 
19 |     def fit(self, X, y=None):
20 | 
21 |         X_train, X_test, y_train, y_test = train_test_split(
22 |                                 X, y, test_size=self.test_ratio)
23 | 
24 |         for estimator in self.base_estimators:
25 |             estimator.fit(X_train, y_train)
26 | 
27 |         meta_train_set = np.array([estimator.predict(X_test)
28 |                                    for estimator in self.base_estimators]).T
29 | 
30 | 
31 |         self.meta_estimator.fit(meta_train_set, y_test)
32 | 
33 |         return self
34 | 
35 |     def predict(self, X, y=None):
36 |         meta_X = []
37 |         for estimator in self.base_estimators:
38 |             meta_X.append(estimator.predict(X))
39 |         meta_X = np.array(meta_X).T
40 | 
41 |         return self.meta_estimator.predict(meta_X)
42 | 
43 | 
44 | class MyStackingClassifier(BaseEstimator, ClassifierMixin):
45 |     def __init__(self, meta_estimator, base_estimators, test_ratio=0.2, feature_weights=None):
46 |         """
47 |         Called when initializing the classifier
48 |         """
49 |         self.meta_estimator = meta_estimator
50 |         self.base_estimators = base_estimators
51 |         self.feature_weights = feature_weights
52 |         self.test_ratio = test_ratio
53 | 
54 |     def fit(self, X, y=None):
55 | 
56 |         X_train, X_test, y_train, y_test = train_test_split(
57 |                                 X, y, test_size=self.test_ratio)
58 | 
59 |         for estimator in self.base_estimators:
60 |             estimator.fit(X_train, y_train)
61 | 
62 |         meta_train_set = np.array([estimator.predict(X_test)
63 |                                    for estimator in self.base_estimators]).T
64 | 
65 | 
66 |         self.meta_estimator.fit(meta_train_set, y_test)
67 |         return self
68 | 
69 |     def predict(self, X, y=None):
70 |         meta_X = []
71 |         for estimator in self.base_estimators:
72 |             meta_X.append(estimator.predict(X))
73 |         meta_X = np.array(meta_X).T
74 |         return self.meta_estimator.predict(meta_X)
75 | 


--------------------------------------------------------------------------------
/code/ch13/tatanic_X_train.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/tatanic_X_train.npy


--------------------------------------------------------------------------------
/code/ch13/tatanic_test.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/tatanic_test.npy


--------------------------------------------------------------------------------
/code/ch13/tatanic_y_train.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch13/tatanic_y_train.npy


--------------------------------------------------------------------------------
/code/ch13/titanic_data_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.preprocessing import MinMaxScaler
 4 | 
 5 | 
 6 | def transform_status(x):
 7 |     if "Mrs" in x or "Ms" in x:
 8 |         return "Mrs"
 9 |     elif "Mr" in x:
10 |         return "Mr"
11 |     elif "Miss" in x:
12 |         return "Miss"
13 |     elif "Master" in x:
14 |         return "Master"
15 |     elif "Dr" in x:
16 |         return "Dr"
17 |     elif "Rev" in x:
18 |         return "Rev"
19 |     elif "Col" in x:
20 |         return "Col"
21 |     else:
22 |         return "0"
23 | 
24 | train_df = pd.read_csv("titanic/train.csv")
25 | test_df = pd.read_csv("titanic/test.csv")
26 | 
27 | train_id = train_df["PassengerId"].values
28 | test_id = test_df["PassengerId"].values
29 | 
30 | all_df = train_df.append(test_df).set_index('PassengerId')
31 | all_df["Sex"] = all_df["Sex"].replace({"male":0,"female":1})
32 | all_df["Age"].fillna(
33 |     all_df.groupby("Pclass")["Age"].transform("mean"), inplace=True)
34 | all_df["cabin_count"] = all_df["Cabin"].map(lambda x : len(x.split()) if type(x) == str else 0)
35 | all_df["social_status"] = all_df["Name"].map(lambda x : transform_status(x))
36 | all_df = all_df.drop([62,830])
37 | train_id = np.delete(train_id, [62-1,830-1])
38 | all_df.loc[all_df["Fare"].isnull(), "Fare"] = 12.415462
39 | all_df["cabin_type"] = all_df["Cabin"].map(lambda x : x[0] if type(x) == str else "99")
40 | 
41 | del all_df["Cabin"]
42 | del all_df["Name"]
43 | del all_df["Ticket"]
44 | 
45 | y = all_df.loc[train_id, "Survived"].values
46 | del all_df["Survived"]
47 | 
48 | X_df = pd.get_dummies(all_df)
49 | X = X_df.values
50 | 
51 | minmax_scaler = MinMaxScaler()
52 | minmax_scaler.fit(X)
53 | X = minmax_scaler.transform(X)
54 | 
55 | X_train = X[:len(train_id)]
56 | X_test = X[len(train_id):]
57 | 
58 | np.save("tatanic_X_train.npy", X_train)
59 | np.save("tatanic_y_train.npy", y)
60 | np.save("tatanic_test.npy", X_test)
61 | 


--------------------------------------------------------------------------------
/code/ch13/xgboost_installation_guide.md:
--------------------------------------------------------------------------------
 1 | # Gradient Boosting Package Installation Guide for windows
 2 | 
 3 | 본 문서는 대표적인 Gradient Boosting Package인 XGBoost와 LightGBM의 Windows 설치를 안내합니다. 두 패키지의 설치를 위해서는 conda를 이용한 설치와 컴파일후 pip로 설치하는 두 가지 방법이 있습니다.
 4 | 
 5 | ## prerequiste
 6 | 패키지 설치를 위해서는 아래와 같은 도구들의 준비가 필요합니다.
 7 | 
 8 | - git(https://git-scm.com/)
 9 | - cmake(https://cmake.org/download/)
10 | - .Net Core SDK(https://www.microsoft.com/net/download/windows)
11 | - .NET Framework Develop Pack
12 | (https://www.microsoft.com/net/download/windows)
13 | 
14 | 
15 | ## XGBoost installation guide
16 | ### conda
17 | conda 설치는 아래와 같이 간단한 명령어 설치 됩니다. 단 컴퓨터 사항에 따라 설치가 되지 않을 수 도 있습니다.
18 | ```bash
19 | activate ml #가상환경 호출
20 | conda install -c mndrake xgboost
21 | ```
22 | ### Install from source code
23 | source code를 사용해서 설치를 할 경우 `cmd창에서` 아래와 같은 명령어를 입력합니다.
24 | 
25 | #### git clone
26 | ```bash
27 | git clone --recursive https://github.com/dmlc/xgboost
28 | 
29 | cd xgboost
30 | git submodule init
31 | git submodule update
32 | ```
33 | 
34 | #### build
35 | ```bash
36 | mkdir build
37 | cd build
38 | cmake .. -G"Visual Studio 15 2017 Win64"
39 | cmake --build . --target xgboost --config Release
40 | cd..
41 | ```
42 | 
43 | #### python 설치
44 | 파이썬 설치전 반드시 가상환경 호출 필요
45 | ```bash
46 | activae ml #가상환경 호출
47 | cd python-package
48 | python setup.py install
49 | ```
50 | 
51 | 
52 | ## lightgbm installation guide
53 | ### conda
54 | conda 설치는 아래와 같이 간단한 명령어 설치 됩니다. 단 컴퓨터 사항에 따라 설치가 되지 않을 수 도 있습니다.
55 | 
56 | ```bash
57 | activate ml #가상환경 호출
58 | conda install -c conda-forge lightgbm
59 | ```
60 | 
61 | ### Install from source code
62 | source code를 사용해서 설치를 할 경우 `cmd창에서` 아래와 같은 명령어를 입력합니다.
63 | 
64 | #### git clone
65 | ```bash
66 | git clone --recursive https://github.com/Microsoft/LightGBM
67 | ```
68 | 
69 | #### build
70 | ```bash
71 | cd LightGBM
72 | mkdir build
73 | cd build
74 | cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..
75 | cmake --build . --target ALL_BUILD --config Release
76 | ```
77 | 
78 | #### python 설치
79 | 파이썬 설치전 반드시 가상환경 호출 필요
80 | ```bash
81 | cd ..
82 | activae ml #가상환경 호출
83 | cd python-package
84 | python setup.py install
85 | ```
86 | 


--------------------------------------------------------------------------------
/code/ch14/7_distributed_training.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stderr",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "/Users/sungchulchoi/miniconda3/envs/ml/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
13 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n",
14 |       "/Users/sungchulchoi/miniconda3/envs/ml/lib/python3.6/site-packages/sklearn/grid_search.py:42: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.\n",
15 |       "  DeprecationWarning)\n"
16 |      ]
17 |     },
18 |     {
19 |      "name": "stdout",
20 |      "output_type": "stream",
21 |      "text": [
22 |       "--- 183.58999824523926 seconds ---\n"
23 |      ]
24 |     }
25 |    ],
26 |    "source": [
27 |     "from sklearn import grid_search, datasets\n",
28 |     "from sklearn.ensemble import RandomForestClassifier\n",
29 |     "from sklearn.grid_search import GridSearchCV\n",
30 |     "digits = datasets.load_digits()\n",
31 |     "X, y = digits.data, digits.target\n",
32 |     "param_grid = {\"max_depth\": [3, None],\n",
33 |     "              \"max_features\": [1, 3, 10],\n",
34 |     "              \"min_samples_split\": [2, 3, 10],\n",
35 |     "              \"min_samples_leaf\": [1, 3, 10],\n",
36 |     "              \"bootstrap\": [True, False],\n",
37 |     "              \"criterion\": [\"gini\", \"entropy\"],\n",
38 |     "              \"n_estimators\": [10, 20, 40, 80]}\n",
39 |     "import time\n",
40 |     "start_time = time.time()\n",
41 |     "gs = grid_search.GridSearchCV(RandomForestClassifier(), param_grid=param_grid)\n",
42 |     "gs.fit(X, y)\n",
43 |     "print(\"--- %s seconds ---\" % (time.time() - start_time))"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "markdown",
48 |    "metadata": {},
49 |    "source": [
50 |     "## Scikit-learn on Spark"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "from sklearn import grid_search, datasets\n",
60 |     "from sklearn.ensemble import RandomForestClassifier\n",
61 |     "# Use spark_sklearn’s grid search instead:\n",
62 |     "from spark_sklearn import GridSearchCV\n",
63 |     "digits = datasets.load_digits()\n",
64 |     "X, y = digits.data, digits.target\n",
65 |     "param_grid = {\"max_depth\": [3, None],\n",
66 |     "              \"max_features\": [1, 3, 10],\n",
67 |     "              \"min_samples_split\": [2, 3, 10],\n",
68 |     "              \"min_samples_leaf\": [1, 3, 10],\n",
69 |     "              \"bootstrap\": [True, False],\n",
70 |     "              \"criterion\": [\"gini\", \"entropy\"],\n",
71 |     "              \"n_estimators\": [10, 20, 40, 80]}\n",
72 |     "gs = grid_search.GridSearchCV(RandomForestClassifier(), param_grid=param_grid)\n",
73 |     "gs.fit(X, y)"
74 |    ]
75 |   }
76 |  ],
77 |  "metadata": {
78 |   "kernelspec": {
79 |    "display_name": "Python 3",
80 |    "language": "python",
81 |    "name": "python3"
82 |   },
83 |   "language_info": {
84 |    "codemirror_mode": {
85 |     "name": "ipython",
86 |     "version": 3
87 |    },
88 |    "file_extension": ".py",
89 |    "mimetype": "text/x-python",
90 |    "name": "python",
91 |    "nbconvert_exporter": "python",
92 |    "pygments_lexer": "ipython3",
93 |    "version": "3.6.5"
94 |   }
95 |  },
96 |  "nbformat": 4,
97 |  "nbformat_minor": 2
98 | }
99 | 


--------------------------------------------------------------------------------
/code/ch14/9_automl_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import numpy as np
 4 | from sklearn.ensemble import RandomForestClassifier
 5 | 
 6 | 
 7 | y_label_df = pd.read_csv("./data/train_label.csv")
 8 | y = pd.Series([1,1,0,1], index=["2month", "month", "retained","week"])
 9 | y_label_df["class"] = y_label_df["label"].map(y)
10 | 
11 | 
12 | X_raw_df = pd.read_csv("./data/train_activity.csv")
13 | X_df = X_raw_df.groupby(["acc_id"]).sum().reset_index()
14 | 
15 | from sklearn.model_selection import train_test_split
16 | 
17 | X_payment_df = pd.read_csv("./data/train_payment.csv")
18 | X_df = X_df.merge(X_payment_df.groupby("acc_id").sum().reset_index(), how="left", on="acc_id")
19 | 
20 | 
21 | X_df = X_df.fillna(0)
22 | 
23 | X_train, X_test, y_train, y_test = train_test_split(
24 |     X_df.values, y_label_df["class"].values, test_size=0.2, stratify=y_label_df["class"])
25 | 
26 | rfc = RandomForestClassifier()
27 | rfc.fit(X_train[:, 1:], y_train)
28 | from sklearn.metrics import accuracy_score
29 | y_pred = rfc.predict(X_test[:,1:])
30 | print(accuracy_score(y_test, y_pred))
31 | 
32 | import autosklearn.classification
33 | automl = autosklearn.classification.AutoSklearnClassifier(
34 |     )
35 | 
36 | automl.fit(X_train[:, 1:], y_train)
37 | 
38 | y_pred = automl.predict(X_test[:,1:])
39 | print("Accuracy score", accuracy_score(y_test, y_pred))
40 | 


--------------------------------------------------------------------------------
/code/ch14/hourse_price_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn import preprocessing
 4 | 
 5 | def get_train_test_split_dataset(train_dataset_filename=None,
 6 |                                 test_dataset_filename = None):
 7 |     df_train = pd.read_csv(train_dataset_filename)
 8 |     df_test = pd.read_csv(test_dataset_filename)
 9 | 
10 | 
11 |     # Train_Test_concat & Trarget_value Extract
12 |     train_index = df_train['Id'].values-1
13 |     test_index = df_test['Id'].values-1
14 |     target_value = df_train.iloc[:,-1]
15 | 
16 |     df_concat = pd.concat([df_train.iloc[:,:-1],df_test],axis=0,ignore_index=True)
17 |     df_concat_numeric = df_concat.loc[:,df_concat.dtypes!='object']
18 |     ###############################################################
19 |     # 칼럼 Searching
20 |     df_concat_numeric.drop(['MoSold'],axis=1,inplace=True)
21 | 
22 |     ###############################################################
23 |     # Missing_value_Solve
24 |     null_colums = df_concat_numeric.isnull().sum().sort_values(ascending=False)[df_concat_numeric.isnull().sum().sort_values(ascending=False)>0].index.tolist()
25 |     ### GarageYrBlt 특이치 해결
26 |     df_concat_numeric.GarageYrBlt.loc[2592] = df_concat_numeric.GarageYrBlt.loc[2592]-200
27 | 
28 |     ### Null_solve_function###################
29 |     def null_solve(data_frame, null_list):
30 |         for column in null_list:
31 |             data_frame[column].fillna(data_frame[column].mean(),inplace=True)
32 |     ##########################################
33 |     null_solve(df_concat_numeric,null_colums)
34 | 
35 |     ### 리모델링 여부 반영
36 |     df_concat_numeric['Remodleling'] = df_concat_numeric['YearBuilt']!=df_concat_numeric['YearRemodAdd']
37 | 
38 |     df_quality_type = df_concat_numeric[['MSSubClass','OverallQual','OverallCond']]
39 |     df_quantity_type = df_concat_numeric.drop(['MSSubClass','OverallQual','OverallCond'],axis=1)
40 | 
41 |     ###############################################################
42 |     #Scaling_value
43 |     ###Min_Max Scaling
44 |     #from sklearn import preprocessing
45 |     minmax_scale = preprocessing.MinMaxScaler().fit(df_quantity_type.iloc[train_index,1:].values)
46 |     x_quantitiy_scaled = minmax_scale.transform(df_quantity_type.iloc[train_index,1:].values)
47 | 
48 |     ###One_hot Scaling
49 |     one_hot = preprocessing.OneHotEncoder()
50 |     one_hot.fit(df_quality_type.iloc[train_index].values)
51 |     x_quality_scaled = one_hot.transform(df_quality_type.iloc[train_index].values).toarray()
52 | 
53 |     #Train
54 |     x_scaled_data = np.hstack((x_quality_scaled,x_quantitiy_scaled))
55 |     Y_scaled_data = target_value.reshape(-1,)
56 | 
57 | 
58 | 
59 |     ###############################################################
60 |     #Predict
61 |     x_quan_predict_scaled = minmax_scale.transform(df_quantity_type.iloc[test_index,1:].values)
62 |     x_qual_predict_scaled = one_hot.transform(df_quality_type.iloc[test_index].values).toarray()
63 |     X_scaled_predict = np.hstack((x_qual_predict_scaled,x_quan_predict_scaled))
64 | 
65 | 
66 |     X_train = x_scaled_data
67 |     y_train = Y_scaled_data
68 |     X_test = X_scaled_predict
69 |     test_id_idx = test_index + 1
70 | 
71 |     return X_train, X_test, y_train, test_id_idx
72 | 


--------------------------------------------------------------------------------
/code/ch2/housing.names:
--------------------------------------------------------------------------------
 1 | 1. Title: Boston Housing Data
 2 | 
 3 | 2. Sources:
 4 |    (a) Origin:  This dataset was taken from the StatLib library which is
 5 |                 maintained at Carnegie Mellon University.
 6 |    (b) Creator:  Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the 
 7 |                  demand for clean air', J. Environ. Economics & Management,
 8 |                  vol.5, 81-102, 1978.
 9 |    (c) Date: July 7, 1993
10 | 
11 | 3. Past Usage:
12 |    -   Used in Belsley, Kuh & Welsch, 'Regression diagnostics ...', Wiley, 
13 |        1980.   N.B. Various transformations are used in the table on
14 |        pages 244-261.
15 |     -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.
16 |        In Proceedings on the Tenth International Conference of Machine 
17 |        Learning, 236-243, University of Massachusetts, Amherst. Morgan
18 |        Kaufmann.
19 | 
20 | 4. Relevant Information:
21 | 
22 |    Concerns housing values in suburbs of Boston.
23 | 
24 | 5. Number of Instances: 506
25 | 
26 | 6. Number of Attributes: 13 continuous attributes (including "class"
27 |                          attribute "MEDV"), 1 binary-valued attribute.
28 | 
29 | 7. Attribute Information:
30 | 
31 |     1. CRIM      per capita crime rate by town
32 |     2. ZN        proportion of residential land zoned for lots over 
33 |                  25,000 sq.ft.
34 |     3. INDUS     proportion of non-retail business acres per town
35 |     4. CHAS      Charles River dummy variable (= 1 if tract bounds 
36 |                  river; 0 otherwise)
37 |     5. NOX       nitric oxides concentration (parts per 10 million)
38 |     6. RM        average number of rooms per dwelling
39 |     7. AGE       proportion of owner-occupied units built prior to 1940
40 |     8. DIS       weighted distances to five Boston employment centres
41 |     9. RAD       index of accessibility to radial highways
42 |     10. TAX      full-value property-tax rate per $10,000
43 |     11. PTRATIO  pupil-teacher ratio by town
44 |     12. B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks 
45 |                  by town
46 |     13. LSTAT    % lower status of the population
47 |     14. MEDV     Median value of owner-occupied homes in $1000's
48 | 
49 | 8. Missing Attribute Values:  None.
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/code/ch3/2_numpy_reshape.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-09-26T06:33:47.101403Z",
  9 |      "start_time": "2017-09-26T06:33:46.925548Z"
 10 |     },
 11 |     "collapsed": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "import numpy as np"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "#### reshape"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 6,
 28 |    "metadata": {
 29 |     "ExecuteTime": {
 30 |      "end_time": "2017-09-26T06:34:35.652950Z",
 31 |      "start_time": "2017-09-26T06:34:35.648564Z"
 32 |     }
 33 |    },
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "(2, 4)"
 39 |       ]
 40 |      },
 41 |      "execution_count": 6,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "test_matrix = [[1,2,3,4], [1,2,5,8]]\n",
 48 |     "np.array(test_matrix).shape"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 8,
 54 |    "metadata": {
 55 |     "ExecuteTime": {
 56 |      "end_time": "2017-09-26T06:34:40.873051Z",
 57 |      "start_time": "2017-09-26T06:34:40.868440Z"
 58 |     }
 59 |    },
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "array([[[1, 2],\n",
 65 |        "        [3, 4]],\n",
 66 |        "\n",
 67 |        "       [[1, 2],\n",
 68 |        "        [5, 8]]])"
 69 |       ]
 70 |      },
 71 |      "execution_count": 8,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "np.array(test_matrix).reshape(2,2,2)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 11,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "(8,)"
 89 |       ]
 90 |      },
 91 |      "execution_count": 11,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "np.array(test_matrix).reshape(8,).shape"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 12,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "(2, 4)"
109 |       ]
110 |      },
111 |      "execution_count": 12,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "np.array(test_matrix).reshape(2,4).shape"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 9,
123 |    "metadata": {
124 |     "ExecuteTime": {
125 |      "end_time": "2017-09-26T06:35:37.334641Z",
126 |      "start_time": "2017-09-26T06:35:37.330454Z"
127 |     }
128 |    },
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "(2, 4)"
134 |       ]
135 |      },
136 |      "execution_count": 9,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "np.array(test_matrix).reshape(2,-1).shape"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 23,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "array([[[1, 2],\n",
154 |        "        [3, 4]],\n",
155 |        "\n",
156 |        "       [[1, 2],\n",
157 |        "        [5, 8]]])"
158 |       ]
159 |      },
160 |      "execution_count": 23,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "np.array(test_matrix).reshape(2,2,2)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 25,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/plain": [
177 |        "(2, 2, 2)"
178 |       ]
179 |      },
180 |      "execution_count": 25,
181 |      "metadata": {},
182 |      "output_type": "execute_result"
183 |     }
184 |    ],
185 |    "source": [
186 |     "np.array(test_matrix).reshape(2,2,2).shape"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "#### flat or flatten()"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 40,
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "array([1, 2, 3, 4, 1, 2, 5, 8, 1, 2, 3, 4, 1, 2, 5, 8])"
205 |       ]
206 |      },
207 |      "execution_count": 40,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "test_matrix = [[[1,2,3,4], [1,2,5,8]], [[1,2,3,4], [1,2,5,8]]]\n",
214 |     "np.array(test_matrix).flatten()"
215 |    ]
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "anaconda-cloud": {},
220 |   "kernelspec": {
221 |    "display_name": "Python 3",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.6.2"
236 |   },
237 |   "nav_menu": {},
238 |   "toc": {
239 |    "navigate_menu": true,
240 |    "number_sections": true,
241 |    "sideBar": true,
242 |    "threshold": 6,
243 |    "toc_cell": false,
244 |    "toc_section_display": "block",
245 |    "toc_window_display": false
246 |   }
247 |  },
248 |  "nbformat": 4,
249 |  "nbformat_minor": 2
250 | }
251 | 


--------------------------------------------------------------------------------
/code/ch3/3_indexing_slicing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-09-26T06:37:50.179800Z",
  9 |      "start_time": "2017-09-26T06:37:49.983626Z"
 10 |     },
 11 |     "collapsed": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "import numpy as np"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {
 22 |     "ExecuteTime": {
 23 |      "end_time": "2017-09-26T06:37:51.740944Z",
 24 |      "start_time": "2017-09-26T06:37:51.731945Z"
 25 |     }
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "array([[1, 2, 3],\n",
 32 |        "       [4, 5, 6]])"
 33 |       ]
 34 |      },
 35 |      "execution_count": 2,
 36 |      "metadata": {},
 37 |      "output_type": "execute_result"
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "test_exmaple = np.array([[1, 2, 3], [4.5, 5, 6]], int)\n",
 42 |     "test_exmaple"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "ExecuteTime": {
 50 |      "end_time": "2017-09-26T06:37:52.891230Z",
 51 |      "start_time": "2017-09-26T06:37:52.886872Z"
 52 |     }
 53 |    },
 54 |    "outputs": [
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "1"
 59 |       ]
 60 |      },
 61 |      "execution_count": 3,
 62 |      "metadata": {},
 63 |      "output_type": "execute_result"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "test_exmaple[0][0]"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 4,
 73 |    "metadata": {
 74 |     "ExecuteTime": {
 75 |      "end_time": "2017-09-26T06:37:53.651076Z",
 76 |      "start_time": "2017-09-26T06:37:53.646830Z"
 77 |     }
 78 |    },
 79 |    "outputs": [
 80 |     {
 81 |      "data": {
 82 |       "text/plain": [
 83 |        "1"
 84 |       ]
 85 |      },
 86 |      "execution_count": 4,
 87 |      "metadata": {},
 88 |      "output_type": "execute_result"
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "test_exmaple[0,0]"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 8,
 98 |    "metadata": {
 99 |     "ExecuteTime": {
100 |      "end_time": "2017-09-26T06:38:09.770954Z",
101 |      "start_time": "2017-09-26T06:38:09.766561Z"
102 |     }
103 |    },
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "array([[10,  2,  3],\n",
109 |        "       [ 4,  5,  6]])"
110 |       ]
111 |      },
112 |      "execution_count": 8,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "test_exmaple[0,0] = 10 # Matrix 0,0 에 12 할당\n",
119 |     "test_exmaple"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 9,
125 |    "metadata": {
126 |     "ExecuteTime": {
127 |      "end_time": "2017-09-26T06:38:12.442886Z",
128 |      "start_time": "2017-09-26T06:38:12.438469Z"
129 |     }
130 |    },
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "5"
136 |       ]
137 |      },
138 |      "execution_count": 9,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "test_exmaple[0][0] = 5 # Matrix 0,0 에 12 할당\n",
145 |     "test_exmaple[0,0]"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "#### slicing"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 22,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "array([[1, 2, 5, 8],\n",
164 |        "       [1, 2, 5, 8]])"
165 |       ]
166 |      },
167 |      "execution_count": 22,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "test_exmaple = np.array([\n",
174 |     "    [1, 2, 5,8], [1, 2, 5,8],[1, 2, 5,8],[1, 2, 5,8]], int)\n",
175 |     "test_exmaple[:2,:] "
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 21,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "data": {
185 |       "text/plain": [
186 |        "array([[1, 2, 5, 8],\n",
187 |        "       [1, 2, 5, 8]])"
188 |       ]
189 |      },
190 |      "execution_count": 21,
191 |      "metadata": {},
192 |      "output_type": "execute_result"
193 |     }
194 |    ],
195 |    "source": [
196 |     "test_exmaple[:,1:3] \n",
197 |     "test_exmaple[1,:2] "
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 13,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "data": {
207 |       "text/plain": [
208 |        "array([[ 3,  4,  5],\n",
209 |        "       [ 8,  9, 10]])"
210 |       ]
211 |      },
212 |      "execution_count": 13,
213 |      "metadata": {},
214 |      "output_type": "execute_result"
215 |     }
216 |    ],
217 |    "source": [
218 |     "test_exmaple = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], int)\n",
219 |     "test_exmaple[:,2:] # 전체 Row의 2열 이상"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": 14,
225 |    "metadata": {},
226 |    "outputs": [
227 |     {
228 |      "data": {
229 |       "text/plain": [
230 |        "array([7, 8])"
231 |       ]
232 |      },
233 |      "execution_count": 14,
234 |      "metadata": {},
235 |      "output_type": "execute_result"
236 |     }
237 |    ],
238 |    "source": [
239 |     "test_exmaple[1,1:3] # 1 Row의 1열 ~ 2열"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 10,
245 |    "metadata": {
246 |     "ExecuteTime": {
247 |      "end_time": "2017-09-26T06:43:31.638458Z",
248 |      "start_time": "2017-09-26T06:43:31.634892Z"
249 |     }
250 |    },
251 |    "outputs": [
252 |     {
253 |      "data": {
254 |       "text/plain": [
255 |        "array([[4, 5, 6]])"
256 |       ]
257 |      },
258 |      "execution_count": 10,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "test_exmaple[1:3] # 1 Row ~ 2Row의 전체"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 16,
270 |    "metadata": {
271 |     "ExecuteTime": {
272 |      "end_time": "2017-09-26T06:45:47.055820Z",
273 |      "start_time": "2017-09-26T06:45:47.050822Z"
274 |     }
275 |    },
276 |    "outputs": [
277 |     {
278 |      "data": {
279 |       "text/plain": [
280 |        "array([[ 9],\n",
281 |        "       [19],\n",
282 |        "       [29],\n",
283 |        "       [39],\n",
284 |        "       [49],\n",
285 |        "       [59],\n",
286 |        "       [69],\n",
287 |        "       [79],\n",
288 |        "       [89],\n",
289 |        "       [99]])"
290 |       ]
291 |      },
292 |      "execution_count": 16,
293 |      "metadata": {},
294 |      "output_type": "execute_result"
295 |     }
296 |    ],
297 |    "source": [
298 |     "a = np.arange(100).reshape(10,10)\n",
299 |     "a[:, -1].reshape(-1,1)"
300 |    ]
301 |   }
302 |  ],
303 |  "metadata": {
304 |   "kernelspec": {
305 |    "display_name": "Python 3",
306 |    "language": "python",
307 |    "name": "python3"
308 |   },
309 |   "language_info": {
310 |    "codemirror_mode": {
311 |     "name": "ipython",
312 |     "version": 3
313 |    },
314 |    "file_extension": ".py",
315 |    "mimetype": "text/x-python",
316 |    "name": "python",
317 |    "nbconvert_exporter": "python",
318 |    "pygments_lexer": "ipython3",
319 |    "version": "3.6.2"
320 |   },
321 |   "nav_menu": {},
322 |   "toc": {
323 |    "navigate_menu": true,
324 |    "number_sections": true,
325 |    "sideBar": true,
326 |    "threshold": 6,
327 |    "toc_cell": false,
328 |    "toc_section_display": "block",
329 |    "toc_window_display": false
330 |   }
331 |  },
332 |  "nbformat": 4,
333 |  "nbformat_minor": 2
334 | }
335 | 


--------------------------------------------------------------------------------
/code/ch3/9_numpy_data_io.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 20,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-09-26T02:28:09.787514Z",
  9 |      "start_time": "2017-09-26T02:28:09.785012Z"
 10 |     },
 11 |     "collapsed": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "import numpy as np"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "#### load txt"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 15,
 28 |    "metadata": {
 29 |     "ExecuteTime": {
 30 |      "end_time": "2017-09-26T02:11:26.296847Z",
 31 |      "start_time": "2017-09-26T02:11:26.291077Z"
 32 |     }
 33 |    },
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "array([[  1900.,  30000.,   4000.,  48300.],\n",
 39 |        "       [  1901.,  47200.,   6100.,  48200.],\n",
 40 |        "       [  1902.,  70200.,   9800.,  41500.],\n",
 41 |        "       [  1903.,  77400.,  35200.,  38200.],\n",
 42 |        "       [  1904.,  36300.,  59400.,  40600.],\n",
 43 |        "       [  1905.,  20600.,  41700.,  39800.],\n",
 44 |        "       [  1906.,  18100.,  19000.,  38600.],\n",
 45 |        "       [  1907.,  21400.,  13000.,  42300.],\n",
 46 |        "       [  1908.,  22000.,   8300.,  44500.],\n",
 47 |        "       [  1909.,  25400.,   9100.,  42100.]])"
 48 |       ]
 49 |      },
 50 |      "execution_count": 15,
 51 |      "metadata": {},
 52 |      "output_type": "execute_result"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "a = np.loadtxt(\"./populations.txt\")\n",
 57 |     "a[:10]"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 18,
 63 |    "metadata": {
 64 |     "ExecuteTime": {
 65 |      "end_time": "2017-09-26T02:11:40.991394Z",
 66 |      "start_time": "2017-09-26T02:11:40.986710Z"
 67 |     }
 68 |    },
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "array([[ 1900, 30000,  4000, 48300],\n",
 74 |        "       [ 1901, 47200,  6100, 48200],\n",
 75 |        "       [ 1902, 70200,  9800, 41500]])"
 76 |       ]
 77 |      },
 78 |      "execution_count": 18,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "a_int = a.astype(int)\n",
 85 |     "a_int[:3]"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 21,
 91 |    "metadata": {
 92 |     "ExecuteTime": {
 93 |      "end_time": "2017-09-26T02:28:11.931523Z",
 94 |      "start_time": "2017-09-26T02:28:11.928654Z"
 95 |     },
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "np.savetxt('int_data.csv',a_int, delimiter=\",\")"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "#### numpy object - npy"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 31,
113 |    "metadata": {
114 |     "ExecuteTime": {
115 |      "end_time": "2017-09-26T02:36:56.843546Z",
116 |      "start_time": "2017-09-26T02:36:56.839840Z"
117 |     },
118 |     "collapsed": true
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "np.save(\"npy_test\", arr=a_int)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 35,
128 |    "metadata": {
129 |     "ExecuteTime": {
130 |      "end_time": "2017-09-26T02:37:07.300334Z",
131 |      "start_time": "2017-09-26T02:37:07.295316Z"
132 |     }
133 |    },
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "array([[ 1900, 30000,  4000, 48300],\n",
139 |        "       [ 1901, 47200,  6100, 48200],\n",
140 |        "       [ 1902, 70200,  9800, 41500]])"
141 |       ]
142 |      },
143 |      "execution_count": 35,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "npy_array = np.load(file=\"npy_test.npy\")\n",
150 |     "npy_array[:3]"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {
157 |     "collapsed": true
158 |    },
159 |    "outputs": [],
160 |    "source": []
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "kernelspec": {
165 |    "display_name": "Python 3",
166 |    "language": "python",
167 |    "name": "python3"
168 |   },
169 |   "language_info": {
170 |    "codemirror_mode": {
171 |     "name": "ipython",
172 |     "version": 3
173 |    },
174 |    "file_extension": ".py",
175 |    "mimetype": "text/x-python",
176 |    "name": "python",
177 |    "nbconvert_exporter": "python",
178 |    "pygments_lexer": "ipython3",
179 |    "version": "3.6.1"
180 |   },
181 |   "nav_menu": {},
182 |   "toc": {
183 |    "navigate_menu": true,
184 |    "number_sections": true,
185 |    "sideBar": true,
186 |    "threshold": 6,
187 |    "toc_cell": false,
188 |    "toc_section_display": "block",
189 |    "toc_window_display": false
190 |   }
191 |  },
192 |  "nbformat": 4,
193 |  "nbformat_minor": 2
194 | }
195 | 


--------------------------------------------------------------------------------
/code/ch3/int_data.csv:
--------------------------------------------------------------------------------
 1 | 1.900000000000000000e+03,3.000000000000000000e+04,4.000000000000000000e+03,4.830000000000000000e+04
 2 | 1.901000000000000000e+03,4.720000000000000000e+04,6.100000000000000000e+03,4.820000000000000000e+04
 3 | 1.902000000000000000e+03,7.020000000000000000e+04,9.800000000000000000e+03,4.150000000000000000e+04
 4 | 1.903000000000000000e+03,7.740000000000000000e+04,3.520000000000000000e+04,3.820000000000000000e+04
 5 | 1.904000000000000000e+03,3.630000000000000000e+04,5.940000000000000000e+04,4.060000000000000000e+04
 6 | 1.905000000000000000e+03,2.060000000000000000e+04,4.170000000000000000e+04,3.980000000000000000e+04
 7 | 1.906000000000000000e+03,1.810000000000000000e+04,1.900000000000000000e+04,3.860000000000000000e+04
 8 | 1.907000000000000000e+03,2.140000000000000000e+04,1.300000000000000000e+04,4.230000000000000000e+04
 9 | 1.908000000000000000e+03,2.200000000000000000e+04,8.300000000000000000e+03,4.450000000000000000e+04
10 | 1.909000000000000000e+03,2.540000000000000000e+04,9.100000000000000000e+03,4.210000000000000000e+04
11 | 1.910000000000000000e+03,2.710000000000000000e+04,7.400000000000000000e+03,4.600000000000000000e+04
12 | 1.911000000000000000e+03,4.030000000000000000e+04,8.000000000000000000e+03,4.680000000000000000e+04
13 | 1.912000000000000000e+03,5.700000000000000000e+04,1.230000000000000000e+04,4.380000000000000000e+04
14 | 1.913000000000000000e+03,7.660000000000000000e+04,1.950000000000000000e+04,4.090000000000000000e+04
15 | 1.914000000000000000e+03,5.230000000000000000e+04,4.570000000000000000e+04,3.940000000000000000e+04
16 | 1.915000000000000000e+03,1.950000000000000000e+04,5.110000000000000000e+04,3.900000000000000000e+04
17 | 1.916000000000000000e+03,1.120000000000000000e+04,2.970000000000000000e+04,3.670000000000000000e+04
18 | 1.917000000000000000e+03,7.600000000000000000e+03,1.580000000000000000e+04,4.180000000000000000e+04
19 | 1.918000000000000000e+03,1.460000000000000000e+04,9.700000000000000000e+03,4.330000000000000000e+04
20 | 1.919000000000000000e+03,1.620000000000000000e+04,1.010000000000000000e+04,4.130000000000000000e+04
21 | 1.920000000000000000e+03,2.470000000000000000e+04,8.600000000000000000e+03,4.730000000000000000e+04
22 | 


--------------------------------------------------------------------------------
/code/ch3/npy_test.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch3/npy_test.npy


--------------------------------------------------------------------------------
/code/ch3/populations.txt:
--------------------------------------------------------------------------------
 1 | # year	hare	lynx	carrot
 2 | 1900	30e3	4e3	48300
 3 | 1901	47.2e3	6.1e3	48200
 4 | 1902	70.2e3	9.8e3	41500
 5 | 1903	77.4e3	35.2e3	38200
 6 | 1904	36.3e3	59.4e3	40600
 7 | 1905	20.6e3	41.7e3	39800
 8 | 1906	18.1e3	19e3	38600
 9 | 1907	21.4e3	13e3	42300
10 | 1908	22e3	8.3e3	44500
11 | 1909	25.4e3	9.1e3	42100
12 | 1910	27.1e3	7.4e3	46000
13 | 1911	40.3e3	8e3	46800
14 | 1912	57e3	12.3e3	43800
15 | 1913	76.6e3	19.5e3	40900
16 | 1914	52.3e3	45.7e3	39400
17 | 1915	19.5e3	51.1e3	39000
18 | 1916	11.2e3	29.7e3	36700
19 | 1917	7.6e3	15.8e3	41800
20 | 1918	14.6e3	9.7e3	43300
21 | 1919	16.2e3	10.1e3	41300
22 | 1920	24.7e3	8.6e3	47300
23 | 


--------------------------------------------------------------------------------
/code/ch4/6_dataframe_basic_operation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "from pandas import Series\n",
 11 |     "from pandas import DataFrame\n",
 12 |     "\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Series addtion operation"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "s1 = Series(\n",
 30 |     "    range(1,6), index=list(\"abced\"))\n",
 31 |     "s1"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "s2 = Series(\n",
 41 |     "    range(5,11), index=list(\"bcedef\"))\n",
 42 |     "s2"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "s1 + s2"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "s1.add(s2)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### dataframe opertion"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "df1 = DataFrame(\n",
 77 |     "    np.arange(9).reshape(3,3), \n",
 78 |     "    columns=list(\"abc\"))\n",
 79 |     "df1"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "df2 = DataFrame(\n",
 89 |     "    np.arange(16).reshape(4,4), \n",
 90 |     "    columns=list(\"abcd\"))\n",
 91 |     "df2"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "df1 + df2"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "df1.add(df2,fill_value=0)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "### operations for dataframe with series"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "df = DataFrame(\n",
126 |     "    np.arange(16).reshape(4,4), \n",
127 |     "    columns=list(\"abcd\"))\n",
128 |     "df"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "s = Series(\n",
138 |     "    np.arange(10,14), \n",
139 |     "    index=list(\"abcd\"))\n",
140 |     "s"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "df + s"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "s2 = Series(np.arange(10,14))\n",
159 |     "s2"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "df"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "df + s2"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "df.add(s2, axis=0)"
187 |    ]
188 |   }
189 |  ],
190 |  "metadata": {
191 |   "anaconda-cloud": {},
192 |   "kernelspec": {
193 |    "display_name": "Python 3",
194 |    "language": "python",
195 |    "name": "python3"
196 |   },
197 |   "language_info": {
198 |    "codemirror_mode": {
199 |     "name": "ipython",
200 |     "version": 3
201 |    },
202 |    "file_extension": ".py",
203 |    "mimetype": "text/x-python",
204 |    "name": "python",
205 |    "nbconvert_exporter": "python",
206 |    "pygments_lexer": "ipython3",
207 |    "version": "3.6.3"
208 |   }
209 |  },
210 |  "nbformat": 4,
211 |  "nbformat_minor": 2
212 | }
213 | 


--------------------------------------------------------------------------------
/code/ch4/8_built_in_functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "from pandas import Series\n",
 11 |     "from pandas import DataFrame\n",
 12 |     "\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Built-in functions"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "df = pd.read_csv(\"data/wages.csv\")\n",
 30 |     "df.head()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "df.describe()"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "df.race.unique()"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "dict(enumerate(sorted(df[\"race\"].unique())))"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "value = list(map(int, np.array(list(enumerate(df[\"race\"].unique())))[:, 0].tolist()))\n",
 67 |     "key = np.array(list(enumerate(df[\"race\"].unique())), dtype=str)[:, 1].tolist()\n",
 68 |     "\n",
 69 |     "value, key"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "df[\"race\"].replace(to_replace=key, value=value, inplace=True)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "df[\"race\"]"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "value = list(map(int, np.array(list(enumerate(df[\"sex\"].unique())))[:, 0].tolist()))\n",
 97 |     "key = np.array(list(enumerate(df[\"sex\"].unique())), dtype=str)[:, 1].tolist()\n",
 98 |     "\n",
 99 |     "value, key"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "df[\"sex\"].replace(to_replace=key, value=value, inplace=True)\n",
109 |     "df.head(5)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "df.sum(axis=0)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "df.sum(axis=1)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "df.isnull()"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "df.isnull().sum(0)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "df.sort_values([\"age\", \"earn\"], ascending=False).head(10)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "df.cumsum().head(5)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "df.cummax().head(10)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "df.sort_values(\"age\", ascending=False).head(10)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "df.age.corr(df.earn)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "df.age[(df.age<45) & (df.age>15)].corr(df.earn)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "df.age.cov(df.earn)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "df.corr()"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": [
226 |     "df.corrwith(df.earn)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "df.sex.value_counts(sort=True)"
236 |    ]
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "anaconda-cloud": {},
241 |   "kernelspec": {
242 |    "display_name": "Python 3",
243 |    "language": "python",
244 |    "name": "python3"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.6.3"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 2
261 | }
262 | 


--------------------------------------------------------------------------------
/code/ch4/data/excel-comp-data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch4/data/excel-comp-data.xlsx


--------------------------------------------------------------------------------
/code/ch5/data/AirPassengers.csv:
--------------------------------------------------------------------------------
  1 | Month,#Passengers
  2 | 1949-01,112
  3 | 1949-02,118
  4 | 1949-03,132
  5 | 1949-04,129
  6 | 1949-05,121
  7 | 1949-06,135
  8 | 1949-07,148
  9 | 1949-08,148
 10 | 1949-09,136
 11 | 1949-10,119
 12 | 1949-11,104
 13 | 1949-12,118
 14 | 1950-01,115
 15 | 1950-02,126
 16 | 1950-03,141
 17 | 1950-04,135
 18 | 1950-05,125
 19 | 1950-06,149
 20 | 1950-07,170
 21 | 1950-08,170
 22 | 1950-09,158
 23 | 1950-10,133
 24 | 1950-11,114
 25 | 1950-12,140
 26 | 1951-01,145
 27 | 1951-02,150
 28 | 1951-03,178
 29 | 1951-04,163
 30 | 1951-05,172
 31 | 1951-06,178
 32 | 1951-07,199
 33 | 1951-08,199
 34 | 1951-09,184
 35 | 1951-10,162
 36 | 1951-11,146
 37 | 1951-12,166
 38 | 1952-01,171
 39 | 1952-02,180
 40 | 1952-03,193
 41 | 1952-04,181
 42 | 1952-05,183
 43 | 1952-06,218
 44 | 1952-07,230
 45 | 1952-08,242
 46 | 1952-09,209
 47 | 1952-10,191
 48 | 1952-11,172
 49 | 1952-12,194
 50 | 1953-01,196
 51 | 1953-02,196
 52 | 1953-03,236
 53 | 1953-04,235
 54 | 1953-05,229
 55 | 1953-06,243
 56 | 1953-07,264
 57 | 1953-08,272
 58 | 1953-09,237
 59 | 1953-10,211
 60 | 1953-11,180
 61 | 1953-12,201
 62 | 1954-01,204
 63 | 1954-02,188
 64 | 1954-03,235
 65 | 1954-04,227
 66 | 1954-05,234
 67 | 1954-06,264
 68 | 1954-07,302
 69 | 1954-08,293
 70 | 1954-09,259
 71 | 1954-10,229
 72 | 1954-11,203
 73 | 1954-12,229
 74 | 1955-01,242
 75 | 1955-02,233
 76 | 1955-03,267
 77 | 1955-04,269
 78 | 1955-05,270
 79 | 1955-06,315
 80 | 1955-07,364
 81 | 1955-08,347
 82 | 1955-09,312
 83 | 1955-10,274
 84 | 1955-11,237
 85 | 1955-12,278
 86 | 1956-01,284
 87 | 1956-02,277
 88 | 1956-03,317
 89 | 1956-04,313
 90 | 1956-05,318
 91 | 1956-06,374
 92 | 1956-07,413
 93 | 1956-08,405
 94 | 1956-09,355
 95 | 1956-10,306
 96 | 1956-11,271
 97 | 1956-12,306
 98 | 1957-01,315
 99 | 1957-02,301
100 | 1957-03,356
101 | 1957-04,348
102 | 1957-05,355
103 | 1957-06,422
104 | 1957-07,465
105 | 1957-08,467
106 | 1957-09,404
107 | 1957-10,347
108 | 1957-11,305
109 | 1957-12,336
110 | 1958-01,340
111 | 1958-02,318
112 | 1958-03,362
113 | 1958-04,348
114 | 1958-05,363
115 | 1958-06,435
116 | 1958-07,491
117 | 1958-08,505
118 | 1958-09,404
119 | 1958-10,359
120 | 1958-11,310
121 | 1958-12,337
122 | 1959-01,360
123 | 1959-02,342
124 | 1959-03,406
125 | 1959-04,396
126 | 1959-05,420
127 | 1959-06,472
128 | 1959-07,548
129 | 1959-08,559
130 | 1959-09,463
131 | 1959-10,407
132 | 1959-11,362
133 | 1959-12,405
134 | 1960-01,417
135 | 1960-02,391
136 | 1960-03,419
137 | 1960-04,461
138 | 1960-05,472
139 | 1960-06,535
140 | 1960-07,622
141 | 1960-08,606
142 | 1960-09,508
143 | 1960-10,461
144 | 1960-11,390
145 | 1960-12,432
146 | 


--------------------------------------------------------------------------------
/code/ch5/data/customer-status.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/customer-status.xlsx


--------------------------------------------------------------------------------
/code/ch5/data/excel-comp-data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/excel-comp-data.xlsx


--------------------------------------------------------------------------------
/code/ch5/data/flights.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/flights.db


--------------------------------------------------------------------------------
/code/ch5/data/movie_rating.csv:
--------------------------------------------------------------------------------
 1 | critic,title,rating
 2 | Jack Matthews,Lady in the Water,3.0
 3 | Jack Matthews,Snakes on a Plane,4.0
 4 | Jack Matthews,You Me and Dupree,3.5
 5 | Jack Matthews,Superman Returns,5.0
 6 | Jack Matthews,The Night Listener,3.0
 7 | Mick LaSalle,Lady in the Water,3.0
 8 | Mick LaSalle,Snakes on a Plane,4.0
 9 | Mick LaSalle,Just My Luck,2.0
10 | Mick LaSalle,Superman Returns,3.0
11 | Mick LaSalle,You Me and Dupree,2.0
12 | Mick LaSalle,The Night Listener,3.0
13 | Claudia Puig,Snakes on a Plane,3.5
14 | Claudia Puig,Just My Luck,3.0
15 | Claudia Puig,You Me and Dupree,2.5
16 | Claudia Puig,Superman Returns,4.0
17 | Claudia Puig,The Night Listener,4.5
18 | Lisa Rose,Lady in the Water,2.5
19 | Lisa Rose,Snakes on a Plane,3.5
20 | Lisa Rose,Just My Luck,3.0
21 | Lisa Rose,Superman Returns,3.5
22 | Lisa Rose,The Night Listener,3.0
23 | Lisa Rose,You Me and Dupree,2.5
24 | Toby,Snakes on a Plane,4.5
25 | Toby,Superman Returns,4.0
26 | Toby,You Me and Dupree,1.0
27 | Gene Seymour,Lady in the Water,3.0
28 | Gene Seymour,Snakes on a Plane,3.5
29 | Gene Seymour,Just My Luck,1.5
30 | Gene Seymour,Superman Returns,5.0
31 | Gene Seymour,You Me and Dupree,3.5
32 | Gene Seymour,The Night Listener,3.0
33 | 


--------------------------------------------------------------------------------
/code/ch5/data/sales-feb-2014.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/sales-feb-2014.xlsx


--------------------------------------------------------------------------------
/code/ch5/data/sales-jan-2014.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/sales-jan-2014.xlsx


--------------------------------------------------------------------------------
/code/ch5/data/sales-mar-2014.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch5/data/sales-mar-2014.xlsx


--------------------------------------------------------------------------------
/code/ch6/9_pipeline_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": "true"
  7 |    },
  8 |    "source": [
  9 |     "# Table of Contents\n",
 10 |     " <p>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {
 17 |     "ExecuteTime": {
 18 |      "end_time": "2017-11-14T07:30:27.836809Z",
 19 |      "start_time": "2017-11-14T07:30:27.648759Z"
 20 |     },
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "from sklearn.datasets import load_boston\n",
 26 |     "import matplotlib.pyplot as plt\n",
 27 |     "import numpy as np"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {
 34 |     "ExecuteTime": {
 35 |      "end_time": "2017-11-14T07:30:27.846084Z",
 36 |      "start_time": "2017-11-14T07:30:27.837993Z"
 37 |     },
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "boston = load_boston()\n",
 43 |     "\n",
 44 |     "x_data = boston.data\n",
 45 |     "y_data = boston.target.reshape(boston.target.size,1)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {
 52 |     "ExecuteTime": {
 53 |      "end_time": "2017-11-14T07:30:27.952644Z",
 54 |      "start_time": "2017-11-14T07:30:27.946769Z"
 55 |     },
 56 |     "collapsed": true
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "from sklearn.model_selection import train_test_split\n",
 61 |     "x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=22)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 5,
 67 |    "metadata": {
 68 |     "ExecuteTime": {
 69 |      "end_time": "2017-11-14T07:30:28.402194Z",
 70 |      "start_time": "2017-11-14T07:30:28.396785Z"
 71 |     }
 72 |    },
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "((404, 13), (404, 1), (102, 13), (102, 1))"
 78 |       ]
 79 |      },
 80 |      "execution_count": 5,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "x_train.shape, y_train.shape, x_test.shape, y_test.shape"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 7,
 92 |    "metadata": {
 93 |     "ExecuteTime": {
 94 |      "end_time": "2017-11-14T07:31:50.044854Z",
 95 |      "start_time": "2017-11-14T07:31:50.002449Z"
 96 |     }
 97 |    },
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "0.76580205144610103"
103 |       ]
104 |      },
105 |      "execution_count": 7,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "from sklearn.preprocessing import StandardScaler\n",
112 |     "from sklearn.linear_model import LinearRegression\n",
113 |     "from sklearn.neural_network import MLPRegressor\n",
114 |     "\n",
115 |     "from sklearn.pipeline import Pipeline\n",
116 |     "\n",
117 |     "pipe_lr = Pipeline(steps=[\n",
118 |     "    ('scl', StandardScaler()), ('regr', LinearRegression())\n",
119 |     "])\n",
120 |     "pipe_mlp = Pipeline(steps=[\n",
121 |     "    ('scl', StandardScaler()), ('regr', MLPRegressor())\n",
122 |     "])\n",
123 |     "\n",
124 |     "\n",
125 |     "pipe_lr.fit(X=x_train, y=y_train)\n",
126 |     "pipe_lr.score(x_test, y_test)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 8,
132 |    "metadata": {
133 |     "ExecuteTime": {
134 |      "end_time": "2017-11-14T07:31:54.034083Z",
135 |      "start_time": "2017-11-14T07:31:53.762815Z"
136 |     }
137 |    },
138 |    "outputs": [
139 |     {
140 |      "name": "stderr",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "/Users/sungchulchoi/miniconda3/envs/ml_python/lib/python3.6/site-packages/sklearn/neural_network/multilayer_perceptron.py:1266: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
144 |       "  y = column_or_1d(y, warn=True)\n",
145 |       "/Users/sungchulchoi/miniconda3/envs/ml_python/lib/python3.6/site-packages/sklearn/neural_network/multilayer_perceptron.py:563: ConvergenceWarning: Stochastic Optimizer: Maximum iterations reached and the optimization hasn't converged yet.\n",
146 |       "  % (), ConvergenceWarning)\n"
147 |      ]
148 |     },
149 |     {
150 |      "data": {
151 |       "text/plain": [
152 |        "0.74963877810227975"
153 |       ]
154 |      },
155 |      "execution_count": 8,
156 |      "metadata": {},
157 |      "output_type": "execute_result"
158 |     }
159 |    ],
160 |    "source": [
161 |     "pipe_mlp.fit(X=x_train, y=y_train)\n",
162 |     "pipe_mlp.score(x_test, y_test)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": []
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "_draft": {
177 |    "nbviewer_url": "https://gist.github.com/152eb43422672d8a45ed69dd2e98e256"
178 |   },
179 |   "anaconda-cloud": {},
180 |   "gist": {
181 |    "data": {
182 |     "description": "codes/4_ml/1_linear_regression/9_pipeline_example.ipynb",
183 |     "public": true
184 |    },
185 |    "id": "152eb43422672d8a45ed69dd2e98e256"
186 |   },
187 |   "kernelspec": {
188 |    "display_name": "Python 3",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.6.2"
203 |   },
204 |   "nav_menu": {},
205 |   "toc": {
206 |    "navigate_menu": true,
207 |    "number_sections": true,
208 |    "sideBar": true,
209 |    "threshold": 6,
210 |    "toc_cell": true,
211 |    "toc_section_display": "block",
212 |    "toc_window_display": false
213 |   },
214 |   "toc_position": {
215 |    "height": "830px",
216 |    "left": "0px",
217 |    "right": "auto",
218 |    "top": "106px",
219 |    "width": "212px"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 2
224 | }
225 | 


--------------------------------------------------------------------------------
/code/ch6/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/code/ch6/test.png


--------------------------------------------------------------------------------
/code/ch6/titanic/gender_submission.csv:
--------------------------------------------------------------------------------
  1 | PassengerId,Survived
  2 | 892,0
  3 | 893,1
  4 | 894,0
  5 | 895,0
  6 | 896,1
  7 | 897,0
  8 | 898,1
  9 | 899,0
 10 | 900,1
 11 | 901,0
 12 | 902,0
 13 | 903,0
 14 | 904,1
 15 | 905,0
 16 | 906,1
 17 | 907,1
 18 | 908,0
 19 | 909,0
 20 | 910,1
 21 | 911,1
 22 | 912,0
 23 | 913,0
 24 | 914,1
 25 | 915,0
 26 | 916,1
 27 | 917,0
 28 | 918,1
 29 | 919,0
 30 | 920,0
 31 | 921,0
 32 | 922,0
 33 | 923,0
 34 | 924,1
 35 | 925,1
 36 | 926,0
 37 | 927,0
 38 | 928,1
 39 | 929,1
 40 | 930,0
 41 | 931,0
 42 | 932,0
 43 | 933,0
 44 | 934,0
 45 | 935,1
 46 | 936,1
 47 | 937,0
 48 | 938,0
 49 | 939,0
 50 | 940,1
 51 | 941,1
 52 | 942,0
 53 | 943,0
 54 | 944,1
 55 | 945,1
 56 | 946,0
 57 | 947,0
 58 | 948,0
 59 | 949,0
 60 | 950,0
 61 | 951,1
 62 | 952,0
 63 | 953,0
 64 | 954,0
 65 | 955,1
 66 | 956,0
 67 | 957,1
 68 | 958,1
 69 | 959,0
 70 | 960,0
 71 | 961,1
 72 | 962,1
 73 | 963,0
 74 | 964,1
 75 | 965,0
 76 | 966,1
 77 | 967,0
 78 | 968,0
 79 | 969,1
 80 | 970,0
 81 | 971,1
 82 | 972,0
 83 | 973,0
 84 | 974,0
 85 | 975,0
 86 | 976,0
 87 | 977,0
 88 | 978,1
 89 | 979,1
 90 | 980,1
 91 | 981,0
 92 | 982,1
 93 | 983,0
 94 | 984,1
 95 | 985,0
 96 | 986,0
 97 | 987,0
 98 | 988,1
 99 | 989,0
100 | 990,1
101 | 991,0
102 | 992,1
103 | 993,0
104 | 994,0
105 | 995,0
106 | 996,1
107 | 997,0
108 | 998,0
109 | 999,0
110 | 1000,0
111 | 1001,0
112 | 1002,0
113 | 1003,1
114 | 1004,1
115 | 1005,1
116 | 1006,1
117 | 1007,0
118 | 1008,0
119 | 1009,1
120 | 1010,0
121 | 1011,1
122 | 1012,1
123 | 1013,0
124 | 1014,1
125 | 1015,0
126 | 1016,0
127 | 1017,1
128 | 1018,0
129 | 1019,1
130 | 1020,0
131 | 1021,0
132 | 1022,0
133 | 1023,0
134 | 1024,1
135 | 1025,0
136 | 1026,0
137 | 1027,0
138 | 1028,0
139 | 1029,0
140 | 1030,1
141 | 1031,0
142 | 1032,1
143 | 1033,1
144 | 1034,0
145 | 1035,0
146 | 1036,0
147 | 1037,0
148 | 1038,0
149 | 1039,0
150 | 1040,0
151 | 1041,0
152 | 1042,1
153 | 1043,0
154 | 1044,0
155 | 1045,1
156 | 1046,0
157 | 1047,0
158 | 1048,1
159 | 1049,1
160 | 1050,0
161 | 1051,1
162 | 1052,1
163 | 1053,0
164 | 1054,1
165 | 1055,0
166 | 1056,0
167 | 1057,1
168 | 1058,0
169 | 1059,0
170 | 1060,1
171 | 1061,1
172 | 1062,0
173 | 1063,0
174 | 1064,0
175 | 1065,0
176 | 1066,0
177 | 1067,1
178 | 1068,1
179 | 1069,0
180 | 1070,1
181 | 1071,1
182 | 1072,0
183 | 1073,0
184 | 1074,1
185 | 1075,0
186 | 1076,1
187 | 1077,0
188 | 1078,1
189 | 1079,0
190 | 1080,1
191 | 1081,0
192 | 1082,0
193 | 1083,0
194 | 1084,0
195 | 1085,0
196 | 1086,0
197 | 1087,0
198 | 1088,0
199 | 1089,1
200 | 1090,0
201 | 1091,1
202 | 1092,1
203 | 1093,0
204 | 1094,0
205 | 1095,1
206 | 1096,0
207 | 1097,0
208 | 1098,1
209 | 1099,0
210 | 1100,1
211 | 1101,0
212 | 1102,0
213 | 1103,0
214 | 1104,0
215 | 1105,1
216 | 1106,1
217 | 1107,0
218 | 1108,1
219 | 1109,0
220 | 1110,1
221 | 1111,0
222 | 1112,1
223 | 1113,0
224 | 1114,1
225 | 1115,0
226 | 1116,1
227 | 1117,1
228 | 1118,0
229 | 1119,1
230 | 1120,0
231 | 1121,0
232 | 1122,0
233 | 1123,1
234 | 1124,0
235 | 1125,0
236 | 1126,0
237 | 1127,0
238 | 1128,0
239 | 1129,0
240 | 1130,1
241 | 1131,1
242 | 1132,1
243 | 1133,1
244 | 1134,0
245 | 1135,0
246 | 1136,0
247 | 1137,0
248 | 1138,1
249 | 1139,0
250 | 1140,1
251 | 1141,1
252 | 1142,1
253 | 1143,0
254 | 1144,0
255 | 1145,0
256 | 1146,0
257 | 1147,0
258 | 1148,0
259 | 1149,0
260 | 1150,1
261 | 1151,0
262 | 1152,0
263 | 1153,0
264 | 1154,1
265 | 1155,1
266 | 1156,0
267 | 1157,0
268 | 1158,0
269 | 1159,0
270 | 1160,1
271 | 1161,0
272 | 1162,0
273 | 1163,0
274 | 1164,1
275 | 1165,1
276 | 1166,0
277 | 1167,1
278 | 1168,0
279 | 1169,0
280 | 1170,0
281 | 1171,0
282 | 1172,1
283 | 1173,0
284 | 1174,1
285 | 1175,1
286 | 1176,1
287 | 1177,0
288 | 1178,0
289 | 1179,0
290 | 1180,0
291 | 1181,0
292 | 1182,0
293 | 1183,1
294 | 1184,0
295 | 1185,0
296 | 1186,0
297 | 1187,0
298 | 1188,1
299 | 1189,0
300 | 1190,0
301 | 1191,0
302 | 1192,0
303 | 1193,0
304 | 1194,0
305 | 1195,0
306 | 1196,1
307 | 1197,1
308 | 1198,0
309 | 1199,0
310 | 1200,0
311 | 1201,1
312 | 1202,0
313 | 1203,0
314 | 1204,0
315 | 1205,1
316 | 1206,1
317 | 1207,1
318 | 1208,0
319 | 1209,0
320 | 1210,0
321 | 1211,0
322 | 1212,0
323 | 1213,0
324 | 1214,0
325 | 1215,0
326 | 1216,1
327 | 1217,0
328 | 1218,1
329 | 1219,0
330 | 1220,0
331 | 1221,0
332 | 1222,1
333 | 1223,0
334 | 1224,0
335 | 1225,1
336 | 1226,0
337 | 1227,0
338 | 1228,0
339 | 1229,0
340 | 1230,0
341 | 1231,0
342 | 1232,0
343 | 1233,0
344 | 1234,0
345 | 1235,1
346 | 1236,0
347 | 1237,1
348 | 1238,0
349 | 1239,1
350 | 1240,0
351 | 1241,1
352 | 1242,1
353 | 1243,0
354 | 1244,0
355 | 1245,0
356 | 1246,1
357 | 1247,0
358 | 1248,1
359 | 1249,0
360 | 1250,0
361 | 1251,1
362 | 1252,0
363 | 1253,1
364 | 1254,1
365 | 1255,0
366 | 1256,1
367 | 1257,1
368 | 1258,0
369 | 1259,1
370 | 1260,1
371 | 1261,0
372 | 1262,0
373 | 1263,1
374 | 1264,0
375 | 1265,0
376 | 1266,1
377 | 1267,1
378 | 1268,1
379 | 1269,0
380 | 1270,0
381 | 1271,0
382 | 1272,0
383 | 1273,0
384 | 1274,1
385 | 1275,1
386 | 1276,0
387 | 1277,1
388 | 1278,0
389 | 1279,0
390 | 1280,0
391 | 1281,0
392 | 1282,0
393 | 1283,1
394 | 1284,0
395 | 1285,0
396 | 1286,0
397 | 1287,1
398 | 1288,0
399 | 1289,1
400 | 1290,0
401 | 1291,0
402 | 1292,1
403 | 1293,0
404 | 1294,1
405 | 1295,0
406 | 1296,0
407 | 1297,0
408 | 1298,0
409 | 1299,0
410 | 1300,1
411 | 1301,1
412 | 1302,1
413 | 1303,1
414 | 1304,1
415 | 1305,0
416 | 1306,1
417 | 1307,0
418 | 1308,0
419 | 1309,0
420 | 


--------------------------------------------------------------------------------
/code/ch6/titanic/submission_result.csv:
--------------------------------------------------------------------------------
  1 | PassengerId,Survived
  2 | 892,0
  3 | 893,0
  4 | 894,0
  5 | 895,0
  6 | 896,1
  7 | 897,0
  8 | 898,1
  9 | 899,0
 10 | 900,1
 11 | 901,0
 12 | 902,0
 13 | 903,0
 14 | 904,1
 15 | 905,0
 16 | 906,1
 17 | 907,1
 18 | 908,0
 19 | 909,0
 20 | 910,1
 21 | 911,1
 22 | 912,0
 23 | 913,0
 24 | 914,1
 25 | 915,1
 26 | 916,1
 27 | 917,0
 28 | 918,1
 29 | 919,0
 30 | 920,0
 31 | 921,0
 32 | 922,0
 33 | 923,0
 34 | 924,0
 35 | 925,1
 36 | 926,1
 37 | 927,0
 38 | 928,1
 39 | 929,1
 40 | 930,0
 41 | 931,0
 42 | 932,0
 43 | 933,0
 44 | 934,0
 45 | 935,1
 46 | 936,1
 47 | 937,0
 48 | 938,0
 49 | 939,0
 50 | 940,1
 51 | 941,0
 52 | 942,0
 53 | 943,0
 54 | 944,1
 55 | 945,1
 56 | 946,0
 57 | 947,0
 58 | 948,0
 59 | 949,0
 60 | 950,0
 61 | 951,1
 62 | 952,0
 63 | 953,0
 64 | 954,0
 65 | 955,1
 66 | 956,1
 67 | 957,1
 68 | 958,1
 69 | 959,0
 70 | 960,1
 71 | 961,1
 72 | 962,1
 73 | 963,0
 74 | 964,1
 75 | 965,1
 76 | 966,1
 77 | 967,1
 78 | 968,0
 79 | 969,1
 80 | 970,0
 81 | 971,1
 82 | 972,0
 83 | 973,0
 84 | 974,0
 85 | 975,0
 86 | 976,0
 87 | 977,0
 88 | 978,1
 89 | 979,1
 90 | 980,1
 91 | 981,0
 92 | 982,1
 93 | 983,0
 94 | 984,1
 95 | 985,0
 96 | 986,1
 97 | 987,0
 98 | 988,1
 99 | 989,0
100 | 990,1
101 | 991,0
102 | 992,1
103 | 993,0
104 | 994,0
105 | 995,0
106 | 996,1
107 | 997,0
108 | 998,0
109 | 999,0
110 | 1000,0
111 | 1001,0
112 | 1002,0
113 | 1003,1
114 | 1004,1
115 | 1005,1
116 | 1006,1
117 | 1007,0
118 | 1008,0
119 | 1009,1
120 | 1010,1
121 | 1011,1
122 | 1012,1
123 | 1013,0
124 | 1014,1
125 | 1015,0
126 | 1016,0
127 | 1017,1
128 | 1018,0
129 | 1019,1
130 | 1020,0
131 | 1021,0
132 | 1022,0
133 | 1023,0
134 | 1024,1
135 | 1025,0
136 | 1026,0
137 | 1027,0
138 | 1028,0
139 | 1029,0
140 | 1030,1
141 | 1031,0
142 | 1032,0
143 | 1033,1
144 | 1034,0
145 | 1035,0
146 | 1036,0
147 | 1037,0
148 | 1038,0
149 | 1039,0
150 | 1040,0
151 | 1041,0
152 | 1042,1
153 | 1043,0
154 | 1044,0
155 | 1045,0
156 | 1046,0
157 | 1047,0
158 | 1048,1
159 | 1049,1
160 | 1050,0
161 | 1051,1
162 | 1052,1
163 | 1053,0
164 | 1054,1
165 | 1055,0
166 | 1056,0
167 | 1057,1
168 | 1058,0
169 | 1059,0
170 | 1060,1
171 | 1061,1
172 | 1062,0
173 | 1063,0
174 | 1064,0
175 | 1065,0
176 | 1066,0
177 | 1067,1
178 | 1068,1
179 | 1069,0
180 | 1070,1
181 | 1071,1
182 | 1072,0
183 | 1073,0
184 | 1074,1
185 | 1075,0
186 | 1076,1
187 | 1077,0
188 | 1078,1
189 | 1079,0
190 | 1080,0
191 | 1081,0
192 | 1082,0
193 | 1083,0
194 | 1084,0
195 | 1085,0
196 | 1086,0
197 | 1087,0
198 | 1088,1
199 | 1089,1
200 | 1090,0
201 | 1091,1
202 | 1092,1
203 | 1093,0
204 | 1094,0
205 | 1095,1
206 | 1096,0
207 | 1097,0
208 | 1098,1
209 | 1099,0
210 | 1100,1
211 | 1101,0
212 | 1102,0
213 | 1103,0
214 | 1104,0
215 | 1105,0
216 | 1106,0
217 | 1107,0
218 | 1108,1
219 | 1109,0
220 | 1110,1
221 | 1111,0
222 | 1112,1
223 | 1113,0
224 | 1114,1
225 | 1115,0
226 | 1116,1
227 | 1117,1
228 | 1118,0
229 | 1119,1
230 | 1120,0
231 | 1121,0
232 | 1122,0
233 | 1123,1
234 | 1124,0
235 | 1125,0
236 | 1126,0
237 | 1127,0
238 | 1128,0
239 | 1129,0
240 | 1130,1
241 | 1131,1
242 | 1132,1
243 | 1133,1
244 | 1134,0
245 | 1135,0
246 | 1136,0
247 | 1137,0
248 | 1138,1
249 | 1139,0
250 | 1140,1
251 | 1141,1
252 | 1142,1
253 | 1143,0
254 | 1144,1
255 | 1145,0
256 | 1146,0
257 | 1147,0
258 | 1148,0
259 | 1149,0
260 | 1150,1
261 | 1151,0
262 | 1152,0
263 | 1153,0
264 | 1154,1
265 | 1155,1
266 | 1156,0
267 | 1157,0
268 | 1158,0
269 | 1159,0
270 | 1160,1
271 | 1161,0
272 | 1162,0
273 | 1163,0
274 | 1164,1
275 | 1165,1
276 | 1166,0
277 | 1167,1
278 | 1168,0
279 | 1169,0
280 | 1170,0
281 | 1171,0
282 | 1172,1
283 | 1173,0
284 | 1174,1
285 | 1175,1
286 | 1176,1
287 | 1177,0
288 | 1178,0
289 | 1179,0
290 | 1180,0
291 | 1181,0
292 | 1182,0
293 | 1183,1
294 | 1184,0
295 | 1185,0
296 | 1186,0
297 | 1187,0
298 | 1188,1
299 | 1189,0
300 | 1190,0
301 | 1191,0
302 | 1192,0
303 | 1193,0
304 | 1194,0
305 | 1195,0
306 | 1196,1
307 | 1197,1
308 | 1198,0
309 | 1199,0
310 | 1200,0
311 | 1201,0
312 | 1202,0
313 | 1203,0
314 | 1204,0
315 | 1205,1
316 | 1206,1
317 | 1207,1
318 | 1208,0
319 | 1209,0
320 | 1210,0
321 | 1211,0
322 | 1212,0
323 | 1213,0
324 | 1214,0
325 | 1215,0
326 | 1216,1
327 | 1217,0
328 | 1218,1
329 | 1219,0
330 | 1220,0
331 | 1221,0
332 | 1222,1
333 | 1223,0
334 | 1224,0
335 | 1225,1
336 | 1226,0
337 | 1227,0
338 | 1228,0
339 | 1229,0
340 | 1230,0
341 | 1231,0
342 | 1232,0
343 | 1233,0
344 | 1234,0
345 | 1235,1
346 | 1236,0
347 | 1237,1
348 | 1238,0
349 | 1239,1
350 | 1240,0
351 | 1241,1
352 | 1242,1
353 | 1243,0
354 | 1244,0
355 | 1245,0
356 | 1246,1
357 | 1247,0
358 | 1248,1
359 | 1249,0
360 | 1250,0
361 | 1251,0
362 | 1252,0
363 | 1253,1
364 | 1254,1
365 | 1255,0
366 | 1256,1
367 | 1257,0
368 | 1258,0
369 | 1259,1
370 | 1260,1
371 | 1261,0
372 | 1262,0
373 | 1263,1
374 | 1264,0
375 | 1265,0
376 | 1266,1
377 | 1267,1
378 | 1268,0
379 | 1269,0
380 | 1270,0
381 | 1271,0
382 | 1272,0
383 | 1273,0
384 | 1274,1
385 | 1275,1
386 | 1276,0
387 | 1277,1
388 | 1278,0
389 | 1279,0
390 | 1280,0
391 | 1281,0
392 | 1282,1
393 | 1283,1
394 | 1284,0
395 | 1285,0
396 | 1286,0
397 | 1287,1
398 | 1288,0
399 | 1289,1
400 | 1290,0
401 | 1291,0
402 | 1292,1
403 | 1293,0
404 | 1294,1
405 | 1295,1
406 | 1296,0
407 | 1297,0
408 | 1298,0
409 | 1299,0
410 | 1300,1
411 | 1301,1
412 | 1302,1
413 | 1303,1
414 | 1304,1
415 | 1305,0
416 | 1306,1
417 | 1307,0
418 | 1308,0
419 | 1309,0
420 | 


--------------------------------------------------------------------------------
/code/ch7/data/slr06.csv:
--------------------------------------------------------------------------------
 1 | X,Y
 2 | 108,392.5
 3 | 19,46.2
 4 | 13,15.7
 5 | 124,422.2
 6 | 40,119.4
 7 | 57,170.9
 8 | 23,56.9
 9 | 14,77.5
10 | 45,214
11 | 10,65.3
12 | 5,20.9
13 | 48,248.1
14 | 11,23.5
15 | 23,39.6
16 | 7,48.8
17 | 2,6.6
18 | 24,134.9
19 | 6,50.9
20 | 3,4.4
21 | 23,113
22 | 6,14.8
23 | 9,48.7
24 | 9,52.1
25 | 3,13.2
26 | 29,103.9
27 | 7,77.5
28 | 4,11.8
29 | 20,98.1
30 | 7,27.9
31 | 4,38.1
32 | 0,0
33 | 25,69.2
34 | 6,14.6
35 | 5,40.3
36 | 22,161.5
37 | 11,57.2
38 | 61,217.6
39 | 12,58.1
40 | 4,12.6
41 | 16,59.6
42 | 13,89.9
43 | 60,202.4
44 | 41,181.3
45 | 37,152.8
46 | 55,162.8
47 | 41,73.4
48 | 11,21.3
49 | 27,92.6
50 | 8,76.1
51 | 3,39.9
52 | 17,142.1
53 | 13,93
54 | 13,31.9
55 | 15,32.1
56 | 8,55.6
57 | 29,133.3
58 | 30,194.5
59 | 24,137.9
60 | 9,87.4
61 | 31,209.8
62 | 14,95.5
63 | 53,244.6
64 | 26,187.5


--------------------------------------------------------------------------------
/code/ch8/yield.csv:
--------------------------------------------------------------------------------
 1 | i	Temp	Yield
 2 | 1	50	3.3
 3 | 2	50	2.8
 4 | 3	50	2.9
 5 | 4	70	2.3
 6 | 5	70	2.6
 7 | 6	70	2.1
 8 | 7	80	2.5
 9 | 8	80	2.9
10 | 9	80	2.4
11 | 10	90	3
12 | 11	90	3.1
13 | 12	90	2.8
14 | 13	100	3.3
15 | 14	100	3.5
16 | 15	100	3
17 | 


--------------------------------------------------------------------------------
/code/ch9/data/generator.csv:
--------------------------------------------------------------------------------
1 | ﻿ID,RPM,VIBRATION,STATUS1,568,585,good2,586,565,good3,609,536,good4,616,492,good5,632,465,good6,652,528,good7,655,496,good8,660,471,good9,688,408,good10,696,399,good11,708,387,good12,701,434,good13,715,506,good14,732,485,good15,731,395,good16,749,398,good17,759,512,good18,773,431,good19,782,456,good20,797,476,good21,794,421,good22,824,452,good23,835,441,good24,862,372,good25,879,340,good26,892,370,good27,913,373,good28,933,330,good29,562,309,faulty30,578,346,faulty31,593,357,faulty32,626,341,faulty33,635,252,faulty34,658,235,faulty35,663,299,faulty36,677,223,faulty37,685,303,faulty38,698,197,faulty39,699,311,faulty40,712,257,faulty41,722,193,faulty42,735,259,faulty43,738,314,faulty44,753,113,faulty45,767,286,faulty46,771,264,faulty47,780,137,faulty48,784,131,faulty49,798,132,faulty50,820,152,faulty51,834,157,faulty52,858,163,faulty53,888,91,faulty54,891,156,faulty55,911,79,faulty56,939,99,faulty


--------------------------------------------------------------------------------
/code/ch9/data/generators.csv:
--------------------------------------------------------------------------------
 1 | ID	RPM	VIBRATION	STATUS
 2 | 1	568	585	good
 3 | 2	586	565	good
 4 | 3	609	536	good
 5 | 4	616	492	good
 6 | 5	632	465	good
 7 | 6	652	528	good
 8 | 7	655	496	good
 9 | 8	660	471	good
10 | 9	688	408	good
11 | 10	696	399	good
12 | 11	708	387	good
13 | 12	701	434	good
14 | 13	715	506	good
15 | 14	732	485	good
16 | 15	731	395	good
17 | 16	749	398	good
18 | 17	759	512	good
19 | 18	773	431	good
20 | 19	782	456	good
21 | 20	797	476	good
22 | 21	794	421	good
23 | 22	824	452	good
24 | 23	835	441	good
25 | 24	862	372	good
26 | 25	879	340	good
27 | 26	892	370	good
28 | 27	913	373	good
29 | 28	933	330	good
30 | 29	562	309	faulty
31 | 30	578	346	faulty
32 | 31	593	357	faulty
33 | 32	626	341	faulty
34 | 33	635	252	faulty
35 | 34	658	235	faulty
36 | 35	663	299	faulty
37 | 36	677	223	faulty
38 | 37	685	303	faulty
39 | 38	698	197	faulty
40 | 39	699	311	faulty
41 | 40	712	257	faulty
42 | 41	722	193	faulty
43 | 42	735	259	faulty
44 | 43	738	314	faulty
45 | 44	753	113	faulty
46 | 45	767	286	faulty
47 | 46	771	264	faulty
48 | 47	780	137	faulty
49 | 48	784	131	faulty
50 | 49	798	132	faulty
51 | 50	820	152	faulty
52 | 51	834	157	faulty
53 | 52	858	163	faulty
54 | 53	888	91	faulty
55 | 54	891	156	faulty
56 | 55	911	79	faulty
57 | 56	939	99	faulty
58 | 


--------------------------------------------------------------------------------
/code/ch99/teamlab_classifier.py:
--------------------------------------------------------------------------------
 1 | class SoftmaxRegressionClassifier(Object):
 2 | 
 3 |     def softmax():
 4 |         return value
 5 | 
 6 |     def loss():
 7 |         return loss
 8 | 
 9 |     def fit():
10 |         return value
11 | 
12 |     def predict():
13 |         return value
14 | 


--------------------------------------------------------------------------------
/code/test.md:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/documents/How_to_use_spark-sklearn_using_Google_Dataproc(kor).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "nbpresent": {
  7 |      "id": "1a8b9591-680e-4d1d-ba06-08bd8d2e77cf"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "## Tutorial: How to use spark-sklearn using Google Datarproc\n",
 12 |     "-------------------------------\n",
 13 |     "\n",
 14 |     "### Overview\n",
 15 |     "- 본 문서는 Google의 Hadoop/Spark 분산 머신 클라우드 서비스인 Google Dataproc을 사용하여, Spark상에서 Scikit-Learn의 Hyper Parameter 최적화를 위해 Grid Search를 활용하는 예제에 대한 튜토리얼이다\n",
 16 |     "- 본 Tutorial은 아래와 같은 내용을 포함한다.\n",
 17 |     "    - gcloud를 활용한 Google Dataproc 클러스터 구성방법\n",
 18 |     "    - 구성된 클러스터위에 Jupyter Notebook을 사용하여 코딩 환경 구성 및 연결하기\n",
 19 |     "    - Spark-sklearn 설치\n",
 20 |     "    - Spark-Sklearn을 활용한 Grid Search 사용 예시"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "### 1. 준비 - Gloud 가입하기"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### 2. gcloud util 설치하기"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### 3. Google Dataproc 클러스터 구성하기"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "### 4. Jupyter Notebook으로 Dataproc 클러스터 접속하기"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "### 5. Spark-Sklearn 설치 및 Grid Search 실시히기"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "### References\n",
 63 |     "- https://databricks.com/blog/2016/02/08/auto-scaling-scikit-learn-with-apache-spark.html\n",
 64 |     "- https://github.com/databricks/spark-sklearn\n",
 65 |     "- https://blog.sourced.tech/post/dataproc_jupyter/\n",
 66 |     "- https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/tree/master/jupyter"
 67 |    ]
 68 |   }
 69 |  ],
 70 |  "metadata": {
 71 |   "anaconda-cloud": {},
 72 |   "kernelspec": {
 73 |    "display_name": "Python [default]",
 74 |    "language": "python",
 75 |    "name": "python3"
 76 |   },
 77 |   "language_info": {
 78 |    "codemirror_mode": {
 79 |     "name": "ipython",
 80 |     "version": 3.0
 81 |    },
 82 |    "file_extension": ".py",
 83 |    "mimetype": "text/x-python",
 84 |    "name": "python",
 85 |    "nbconvert_exporter": "python",
 86 |    "pygments_lexer": "ipython3",
 87 |    "version": "3.5.3"
 88 |   },
 89 |   "nbpresent": {
 90 |    "slides": {
 91 |     "042a2c47-7c36-4f55-be80-69d72de25b27": {
 92 |      "id": "042a2c47-7c36-4f55-be80-69d72de25b27",
 93 |      "prev": "57a506bf-e894-4f47-bc2a-0550e1371f8e",
 94 |      "regions": {}
 95 |     },
 96 |     "57a506bf-e894-4f47-bc2a-0550e1371f8e": {
 97 |      "id": "57a506bf-e894-4f47-bc2a-0550e1371f8e",
 98 |      "prev": null,
 99 |      "regions": {}
100 |     }
101 |    },
102 |    "themes": {}
103 |   }
104 |  },
105 |  "nbformat": 4,
106 |  "nbformat_minor": 0
107 | }


--------------------------------------------------------------------------------
/documents/test.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/documents/test.md


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/linux_mac/install.sh:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/linux_mac/numpy_lab.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def n_size_ndarray_creation(n, dtype=np.int):
 5 |     pass
 6 | 
 7 | 
 8 | def zero_or_one_or_empty_ndarray(shape, type=0, dtype=np.int):
 9 |     pass
10 | 
11 | 
12 | def change_shape_of_ndarray(X, n_row):
13 |     pass
14 | 
15 | 
16 | def concat_ndarray(X_1, X_2, axis):
17 |     pass
18 | 
19 | 
20 | def normalize_ndarray(X, axis=99, dtype=np.float32):
21 |     pass
22 | 
23 | 
24 | def save_ndarray(X, filename="test.npy"):
25 |     pass
26 | 
27 | 
28 | def boolean_index(X, condition):
29 |     pass
30 | 
31 | 
32 | def find_nearest_value(X, target_value):
33 |     pass
34 | 
35 | 
36 | def get_n_largest_values(X, n):
37 |     pass
38 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/linux_mac/submit.sh:
--------------------------------------------------------------------------------
1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
2 | export BACKEND_SECRET_KEY=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
3 | if [ $# -eq 0 ]
4 | then
5 |   echo "Please give hash key as argument."
6 | else
7 |   backend.ai run --exec "python test.py numpy_lab.py $1" python3 test.py numpy_lab.py 
8 | fi
9 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/linux_mac/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_numpy_lab.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | test_module = types.ModuleType(
 63 |     'test_code',
 64 |     doc='Test case')
 65 | 
 66 | exec(test_code, test_module.__dict__)
 67 | sys.modules['test_code'] = test_module
 68 | 
 69 | import test_code as tc
 70 | loader = unittest.loader.defaultTestLoader
 71 | null_stream = open(os.devnull, "w")
 72 | test_suite = loader.loadTestsFromModule(tc)
 73 | result = unittest.TextTestRunner(
 74 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 75 | 
 76 | print("Generating result sheet...")
 77 | print("-------------------------------------------------------------------")
 78 | print("                 Test Case |  Passed? |   Feedback")
 79 | print("-------------------------------------------------------------------")
 80 | for c, r in result.tests_run:
 81 |     print("{0:s} |  {1:s}  | {2} ".format(
 82 |         c.rsplit('.', 1)[1].rjust(26),
 83 |         "PASSED" if r == 1 else "FAILED",
 84 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 85 | 
 86 | # print(json.dumps(result.tests_run))
 87 | print("Reading source file...")
 88 | 
 89 | file = open(filename, "r")
 90 | print("Transferring results to server...")
 91 | payload = {
 92 |     'hashkey': hashkey,
 93 |     'result': result.tests_run,
 94 |     'code': file.read()
 95 | }
 96 | try:
 97 |     data = urllib.parse.urlencode(payload)
 98 |     data = data.encode('ascii')
 99 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
100 |     with urllib.request.urlopen(req) as response:
101 |         resp = response.read()
102 | 
103 |     if json.loads(resp)['result'] == 0:
104 |         print("Transfer failed: hash key is already used.")
105 |     else:
106 |         print("Transfer completed.")
107 | 
108 | except Exception as e:
109 |     print("Error occurred on transferring.", e)
110 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/numpy_lab.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/1_lab_numpy/numpy_lab.pdf


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/numpy_lab.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def n_size_ndarray_creation(n, dtype=np.int):
 5 |     pass
 6 | 
 7 | 
 8 | def zero_or_one_or_empty_ndarray(shape, type=0, dtype=np.int):
 9 |     pass
10 | 
11 | 
12 | def change_shape_of_ndarray(X, n_row):
13 |     pass
14 | 
15 | 
16 | def concat_ndarray(X_1, X_2, axis):
17 |     pass
18 | 
19 | 
20 | def normalize_ndarray(X, axis=99, dtype=np.float32):
21 |     pass
22 | 
23 | 
24 | def save_ndarray(X, filename="test.npy"):
25 |     pass
26 | 
27 | 
28 | def boolean_index(X, condition):
29 |     pass
30 | 
31 | 
32 | def find_nearest_value(X, target_value):
33 |     pass
34 | 
35 | 
36 | def get_n_largest_values(X, n):
37 |     pass
38 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/windows/install.bat:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/windows/numpy_lab.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def n_size_ndarray_creation(n, dtype=np.int):
 5 |     pass
 6 | 
 7 | 
 8 | def zero_or_one_or_empty_ndarray(shape, type=0, dtype=np.int):
 9 |     pass
10 | 
11 | 
12 | def change_shape_of_ndarray(X, n_row):
13 |     pass
14 | 
15 | 
16 | def concat_ndarray(X_1, X_2, axis):
17 |     pass
18 | 
19 | 
20 | def normalize_ndarray(X, axis=99, dtype=np.float32):
21 |     pass
22 | 
23 | 
24 | def save_ndarray(X, filename="test.npy"):
25 |     pass
26 | 
27 | 
28 | def boolean_index(X, condition):
29 |     pass
30 | 
31 | 
32 | def find_nearest_value(X, target_value):
33 |     pass
34 | 
35 | 
36 | def get_n_largest_values(X, n):
37 |     pass
38 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/windows/submit.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
 4 | set BACKEND_SECRET_KEY=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
 5 | 
 6 | set tmp="%1"
 7 | if "%tmp:"=.%"==".." (
 8 |     echo "Please give hash key as argument."
 9 | ) else (
10 |     backend.ai run --exec "python test.py numpy_lab.py %tmp%" python3 test.py numpy_lab.py
11 | )
12 | 


--------------------------------------------------------------------------------
/lab_asssigment/1_lab_numpy/windows/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_numpy_lab.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | test_module = types.ModuleType(
 63 |     'test_code',
 64 |     doc='Test case')
 65 | 
 66 | exec(test_code, test_module.__dict__)
 67 | sys.modules['test_code'] = test_module
 68 | 
 69 | import test_code as tc
 70 | loader = unittest.loader.defaultTestLoader
 71 | null_stream = open(os.devnull, "w")
 72 | test_suite = loader.loadTestsFromModule(tc)
 73 | result = unittest.TextTestRunner(
 74 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 75 | 
 76 | print("Generating result sheet...")
 77 | print("-------------------------------------------------------------------")
 78 | print("                 Test Case |  Passed? |   Feedback")
 79 | print("-------------------------------------------------------------------")
 80 | for c, r in result.tests_run:
 81 |     print("{0:s} |  {1:s}  | {2} ".format(
 82 |         c.rsplit('.', 1)[1].rjust(26),
 83 |         "PASSED" if r == 1 else "FAILED",
 84 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 85 | 
 86 | # print(json.dumps(result.tests_run))
 87 | print("Reading source file...")
 88 | 
 89 | file = open(filename, "r")
 90 | print("Transferring results to server...")
 91 | payload = {
 92 |     'hashkey': hashkey,
 93 |     'result': result.tests_run,
 94 |     'code': file.read()
 95 | }
 96 | try:
 97 |     data = urllib.parse.urlencode(payload)
 98 |     data = data.encode('ascii')
 99 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
100 |     with urllib.request.urlopen(req) as response:
101 |         resp = response.read()
102 | 
103 |     if json.loads(resp)['result'] == 0:
104 |         print("Transfer failed: hash key is already used.")
105 |     else:
106 |         print("Transfer completed.")
107 | 
108 | except Exception as e:
109 |     print("Error occurred on transferring.", e)
110 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/README.md:
--------------------------------------------------------------------------------
  1 | Lab - Build a matrix
  2 | ===============================
  3 | Copyright 2018 © document created by teamLab.gachon@gmail.com
  4 | 
  5 | ## Introduction
  6 | 
  7 | [PDF 파일 다운로드](https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/build_matrix.pdf)
  8 | 
  9 | Machin Learning의 두 번째 랩은 Pandas와 Numpy를 활용하여 Rating Matrix 또는 Frequent Matrix를 만드는 것입니다. 추천 시스템 개발 등 머신러닝을 하다 보면 누가, 어떤 물건(또는 서비스)를 얼마나 이용하고 평가 하였는 가를 Matrix 형태로 변형하여 분석하는 일이 많은데 이를 위한 전처리 과정이 필요합니다. 흔한 예제로 생각해보면 영화를 본 사용자들이 각 영화를 평가한 별점 점수를 Matrix 형태로 표현하는 것이 있습니다. 일반적으로 데이터 베이스는 저장 공간의 효율성을 위해 Matrix 형태로 저장하는 것이 아니라 Event과 발생한 정보를 Row 단위로 저장합니다. 이렇게 DB에 쌓인 정보를 Matrix로 변환하는 게 이번 랩의 목표입니다. 실제 데이터의 변환은 아래 그림과 같습니다.
 10 | 
 11 | ![matrix](images/2018/01/matrix.png)
 12 | 
 13 | ## backend.ai 설치
 14 | 숙제를 제출하기 앞서, [레블업](http://www.lablup.com/)의 backend.ai를 여러분의 파이썬에 설치하셔야 합니다. 설치하는 과정은 매우 쉽습니다. 아래처럼 터미널 또는 cmd 창에서 입력을 하시면 됩니다.
 15 | 
 16 | ```bash
 17 | pip install backend.ai-client
 18 | ```
 19 | 
 20 | ## 숙제 파일(lab_bulid_matrix.zip) 다운로드
 21 |  먼저 해야 할 일은 숙제 파일을 다운로드 받는 것 입니다. 아래링크를 다운로드 하거나 Chrome 또는 익스플로러와 같은 웹 브라우저 주소창에 아래 주소를 입력합니다.
 22 | 
 23 |  - 링크 [2_lab_bulid_matrix.zip](https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/2_lab_build_matrix.zip)
 24 |  - https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/2_lab_build_matrix.zip
 25 | 
 26 |  또는 Mac OS에서는 아래 명령을 쓰셔도 됩니다.
 27 |  ```bash
 28 |  wget https://s3.ap-northeast-2.amazonaws.com/teamlab-gachon/mooc_pic/2_lab_bulid_matrix.zip
 29 |  ```
 30 | 
 31 |  다운로드 된 `2_lab_bulid_matrix.zip` 파일을 작업 폴더로 이동한 후 압축해제 후 작업하시길 바랍니다.
 32 |  압축해제 하면 폴더가 `linux_mac`과 `windows`로 나눠져 있습니다. 자신의 OS에 맞는 폴더로 이동해서 코드를 수정해 주시기 바랍니다.
 33 | 
 34 | 
 35 | ## bulid_matrix.py 코드 구조
 36 | 본 Lab은 Pandas의 기본적인 동작과 Numpy를 결합하여 일반적으로 쌓여있는 데이터를 Matrix 형태로 변경합니다. 변환되는 Matrix 형태는 두가지이며, 본 Lab도 두 가지 모두를 지원하는 것을 목표로 합니다.
 37 | 
 38 | #### get_rating_matrix
 39 | 첫 번째 함수는 Rating Matix을 만드는 것 입니다. Rating Matrix는 영화, 책 처럼 사용자가 제품에 대한 평가를 Matrix 형태로 표현한 것입니다. 저희는 `movie_rating.csv`라는 파일을 활용하여 rating matrix를 구성한다. `movie_rating.csv`는 아래처럼 구성된다.
 40 | 
 41 | source  |target   | rating
 42 | --|---|--
 43 | Mick LaSalle|Superman Returns|3.0
 44 | Mick LaSalle|The Night Listener|3.0
 45 | Claudia Puig|Snakes on a Plane|3.5
 46 | Claudia Puig|Just My Luck|3.0
 47 | Claudia Puig|The Night Listener|4.5
 48 | Lisa Rose|Lady in the Water|2.5
 49 | Lisa Rose|Snakes on a Plane|3.5
 50 | 
 51 | 본 랩에서 다루는 모든 csv파일의 column은 source, target으로 구성되며, source는 row의 index 정보가, target는 column의 기준 정보가 된다. rating 정보는 `get_rating_matrix` 함수에서만 사용되며, 사용자가 영화에 대한 평가를 정보를 담고 있다.
 52 | 
 53 | 본 랩의 목적은 위 테이블과 같이 구성된 정보를 Matrix 형태로 바꾸는 거다. Matrix 형태로 바꾸는 규칙은 다음과 같다.
 54 | - source는 row, target은 column의 기준이 된다.
 55 | - source와 target의 정렬된 값을 활용하여 index를 설정한다. 즉 위 Table에서는 `Claudia Puig`과 row의 0번째 index로 설정된다.
 56 | - rating의 정보는 Matrix에서 각 Element 값에 할당된다.
 57 | - 생성되는 Matrix Ndarray로 나타난다.
 58 | - dict, collection 모듈 등 파이썬의 Built-in Module은 사용할 수 있으나, for 문은 사용할 수 없다.
 59 | 
 60 | 생성하는 함수의 Template은 아래와 같으며, 입력값은 처리하는 csv 파일의 이름만 넣을 수 있다.
 61 | ```python
 62 | def get_rating_matrix(filename):
 63 |     pass
 64 | ```
 65 | 실제한 구현한 예제와 결과물은 아래와 같다.
 66 | ```python
 67 | >>> import numpy as np
 68 | >>> import build_matrix as test_code
 69 | >>> test_code.get_rating_matrix("movie_rating.csv")
 70 | array([[ 3. ,  0. ,  3.5,  0. ,  4.5,  0. ],
 71 |        [ 0. ,  3. ,  3.5,  0. ,  3. ,  3.5],
 72 |        [ 0. ,  3. ,  4. ,  5. ,  3. ,  3.5],
 73 |        [ 3. ,  2.5,  3.5,  3.5,  3. ,  2.5],
 74 |        [ 2. ,  3. ,  4. ,  3. ,  3. ,  0. ],
 75 |        [ 0. ,  0. ,  4.5,  4. ,  0. ,  0. ]], dtype=float32)
 76 | ```
 77 | 
 78 | #### get_frequent_matrix
 79 | 두 번째 함수는 얼마나 빈번하게 제품을 구매했는지를 표현하는 Frequent Matrix를 만드는 것 입니다. Frequent Matrix는 사용자가 특정 제품을 구매한 횟수를 기록하는 Matrix이다. 저희가 제공하는 csv파일은 `1000i.csv`라는 파일로 아래처럼 구성되어 있습니다.
 80 | 
 81 | source  |target   
 82 | --|---
 83 | source,target
 84 | 3|7
 85 | 4|15
 86 | 2|49
 87 | 5|44
 88 | 1|1
 89 | 2|19
 90 | 4|22
 91 | 4|34
 92 | 4|40
 93 | 5|31
 94 | 4|17
 95 | 5|16
 96 | 2|43
 97 | 5|20
 98 | 3|48
 99 | 
100 | 본 함수에서는 기존 함수와 달리 Rating column이 없습니다. 대시신 source와 target의 조합이 한 개 이상으로 중복될 수 있고, 이것이 Frequent로 처리해야 합니다. 즉 Rating이 명시적으로 있는게 아니라 데이터를 통해 Frequent를 찾아내는 것이 목적입니다. Matrix 형태로 바꾸는 규칙은 다음과 같습니다.
101 | - source는 row, target은 column의 기준이 된다.
102 | - source와 target의 정렬된 값을 활용하여 index를 설정한다. 즉 위 Table에서는 `1`은 row의 0번째 index로 설정된다.
103 | - Source와 Target이 출현한 정보는 Frequent로 Matrix에서 각 Element 값에 할당되어야 한다.
104 | - 생성되는 Matrix Ndarray로 나타내며, dtype은 np.float32
105 | - dict, collection 모듈 등 파이썬의 Built-in Module은 사용할 수 있으나, for 문은 사용할 수 없다.
106 | 
107 | 
108 | 생성하는 함수의 Template은 아래와 같으며, 입력값은 처리하는 csv 파일의 이름만 넣을 수 있다.
109 | ```python
110 | def get_frequent_matrix(filename):
111 |     pass
112 | ```
113 | 실제한 구현한 예제와 결과물은 아래와 같다.
114 | ```python
115 | >>> import numpy as np
116 | >>> import build_matrix as test_code
117 | >>> test_code.get_frequent_matrix("1000i.csv")
118 | array([[ 19.,  17.,  14.,  11.,  17.,  25.,   7.,  22.,   5.,  18.,  10.,
119 |          13.,  13.,   8.,  20.,  10.,   9.,  10.,  16.,  15.,   9.,  11.,
120 |          17.,  15.,  14.,   8.,   6.,  12.,  18.,  12.,   6.,  18.,   9.,
121 |          24.,   7.,  19.,  14.,   6.,   4.,  12.,  15.,  14.,  20.,   9.,
122 |          12.,  16.,  11.,   9.,  11.,  12.],
123 |        [ 20.,  16.,  10.,  15.,  17.,  18.,  10.,  13.,   5.,  19.,   8.,
124 |          14.,  14.,   9.,  15.,  14.,  13.,   8.,  12.,   9.,   5.,  10.,
125 |          28.,  18.,   7.,   8.,   6.,  19.,  14.,  13.,  11.,  12.,  18.,
126 |          15.,   7.,  11.,  17.,   9.,   5.,   5.,  13.,  12.,  15.,   9.,
127 |          13.,  16.,  16.,  10.,  16.,   9.],
128 |        [ 12.,  16.,  13.,  19.,  23.,  19.,   5.,  14.,   5.,  18.,   7.,
129 |           6.,  14.,   8.,  20.,  17.,  14.,  11.,  16.,  12.,   7.,   9.,
130 |          23.,  12.,  12.,   8.,   7.,  23.,  26.,  10.,   9.,  20.,  16.,
131 |          11.,   4.,  19.,  12.,  12.,   5.,  10.,  10.,  14.,  10.,  17.,
132 |          15.,  16.,  11.,  17.,   9.,  11.],
133 |        [ 14.,  14.,  19.,  11.,  11.,  18.,   7.,  16.,   7.,  17.,   6.,
134 |          19.,  18.,  12.,  13.,  13.,  14.,   9.,  21.,  16.,   6.,   6.,
135 |          19.,  14.,  19.,   5.,  12.,  14.,  18.,  11.,  11.,  21.,  15.,
136 |          10.,  11.,  14.,  17.,  21.,   6.,  14.,   9.,  16.,  18.,  12.,
137 |          16.,  16.,  26.,  16.,  12.,  20.],
138 |        [ 13.,   7.,   8.,  15.,  13.,  16.,   3.,  19.,  11.,  12.,   7.,
139 |          10.,  13.,  14.,  16.,  14.,  23.,   9.,  13.,  10.,  11.,   3.,
140 |          11.,  14.,   9.,   6.,  11.,  16.,  18.,  11.,   5.,  14.,  10.,
141 |          16.,  10.,   5.,  14.,  11.,   3.,   9.,  11.,  10.,  16.,   8.,
142 |          13.,  20.,  14.,  18.,  21.,   3.]], dtype=float32)
143 | ```
144 | 
145 | ### 숙제 template 파일 제출하기 (윈도우의 경우)
146 | 1. <kbd>windows</kbd><sup id="windows"></sup>+<kbd>r</kbd>를 누르고 cmd 입력 후 확인을 클릭합니다.
147 | 2. 작업을 수행한 폴더로 이동 합니다.
148 | 3. 밑에 명령어를 cmd창에 입력합니다.
149 | ```bash
150 | install.bat
151 | submit.bat [YOUR_HASH_KEY]
152 | ```
153 | 
154 | ### 숙제 template 파일 제출하기 (Mac or Linux)
155 | 1. 터미널을 구동합니다.
156 | 2. 작업을 수행한 디렉토리로로 이동 합니다.
157 | 3. 밑에 bash창을 입력합니다.
158 | ```bash
159 | bash install.sh
160 | bash submit.sh [YOUR_HASH_KEY]
161 | ```
162 | > backend.ai 서비스의 업데이트에 의해 실행전 반드시 `bash install.sh` 또는 `install.bat` 수행을 바랍니다.
163 | 
164 | ## Next Work
165 | 고생하셨습니다. Numpy와 Pandas를 함께 해야함 성공할 수 있는 랩입니다. 아직 Matrix와 Vector 데이터를 핸들링하는 방법이 익숙하지 않았다면 상당히 어렵게 푸셨을 것 같습니다. 그럼에도 불구하고, 우리는 계속 전진해야 합니다. Code가 당신과 함께 하길...
166 | 
167 | > **Human knowledge belongs to the world** - from movie 'Password' -
168 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/build_matrix.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/2_lab_build_matrix/build_matrix.pdf


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/build_matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_rating_matrix(filename, dtype=np.float32):
 6 |     pass
 7 | 
 8 | 
 9 | def get_frequent_matrix(filename, dtype=np.float32):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/images/2018/01/matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/2_lab_build_matrix/images/2018/01/matrix.png


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/linux_mac/build_matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_rating_matrix(filename, dtype=np.float32):
 6 |     pass
 7 | 
 8 | 
 9 | def get_frequent_matrix(filename, dtype=np.float32):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/linux_mac/install.sh:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/linux_mac/submit.sh:
--------------------------------------------------------------------------------
1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
2 | export BACKEND_SECRET_KEY=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
3 | if [ $# -eq 0 ]
4 | then
5 |   echo "Please give hash key as argument."
6 | else
7 |   backend.ai run --exec "python test.py build_matrix.py $1" python3 test.py build_matrix.py
8 | fi
9 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/linux_mac/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_build_matrix.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/movie_rating.csv') as response:
 63 |     example_txt = response.read().decode('utf-8')
 64 |     f = open("movie_rating.csv", "w")
 65 |     f.write(example_txt)
 66 |     f.close()
 67 | 
 68 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/1000i.csv') as response:
 69 |     example_txt = response.read().decode('utf-8')
 70 |     f = open("1000i.csv", "w")
 71 |     f.write(example_txt)
 72 |     f.close()
 73 | 
 74 | 
 75 | test_module = types.ModuleType(
 76 |     'test_code',
 77 |     doc='Test case')
 78 | 
 79 | exec(test_code, test_module.__dict__)
 80 | sys.modules['test_code'] = test_module
 81 | 
 82 | import test_code as tc
 83 | loader = unittest.loader.defaultTestLoader
 84 | null_stream = open(os.devnull, "w")
 85 | test_suite = loader.loadTestsFromModule(tc)
 86 | result = unittest.TextTestRunner(
 87 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 88 | 
 89 | print("Generating result sheet...")
 90 | print("-------------------------------------------------------------------")
 91 | print("                 Test Case |  Passed? |   Feedback")
 92 | print("-------------------------------------------------------------------")
 93 | for c, r in result.tests_run:
 94 |     print("{0:s} |  {1:s}  | {2} ".format(
 95 |         c.rsplit('.', 1)[1].rjust(26),
 96 |         "PASSED" if r == 1 else "FAILED",
 97 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 98 | 
 99 | # print(json.dumps(result.tests_run))
100 | print("Reading source file...")
101 | 
102 | file = open(filename, "r")
103 | print("Transferring results to server...")
104 | payload = {
105 |     'hashkey': hashkey,
106 |     'result': result.tests_run,
107 |     'code': file.read()
108 | }
109 | try:
110 |     data = urllib.parse.urlencode(payload)
111 |     data = data.encode('ascii')
112 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
113 |     with urllib.request.urlopen(req) as response:
114 |         resp = response.read()
115 | 
116 |     if json.loads(resp)['result'] == 0:
117 |         print("Transfer failed: hash key is already used.")
118 |     else:
119 |         print("Transfer completed.")
120 | 
121 | except Exception as e:
122 |     print("Error occurred on transferring.", e)
123 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/movie_rating.csv:
--------------------------------------------------------------------------------
 1 | source,target,rating
 2 | Jack Matthews,Lady in the Water,3.0
 3 | Jack Matthews,Snakes on a Plane,4.0
 4 | Jack Matthews,You Me and Dupree,3.5
 5 | Jack Matthews,Superman Returns,5.0
 6 | Jack Matthews,The Night Listener,3.0
 7 | Mick LaSalle,Lady in the Water,3.0
 8 | Mick LaSalle,Snakes on a Plane,4.0
 9 | Mick LaSalle,Just My Luck,2.0
10 | Mick LaSalle,Superman Returns,3.0
11 | Mick LaSalle,The Night Listener,3.0
12 | Claudia Puig,Snakes on a Plane,3.5
13 | Claudia Puig,Just My Luck,3.0
14 | Claudia Puig,The Night Listener,4.5
15 | Lisa Rose,Lady in the Water,2.5
16 | Lisa Rose,Snakes on a Plane,3.5
17 | Lisa Rose,Just My Luck,3.0
18 | Lisa Rose,Superman Returns,3.5
19 | Lisa Rose,The Night Listener,3.0
20 | Lisa Rose,You Me and Dupree,2.5
21 | Toby,Snakes on a Plane,4.5
22 | Toby,Superman Returns,4.0
23 | Gene Seymour,Lady in the Water,3.0
24 | Gene Seymour,Snakes on a Plane,3.5
25 | Gene Seymour,You Me and Dupree,3.5
26 | Gene Seymour,The Night Listener,3.0
27 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/windows/build_matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_rating_matrix(filename, dtype=np.float32):
 6 |     pass
 7 | 
 8 | 
 9 | def get_frequent_matrix(filename, dtype=np.float32):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/windows/install.bat:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/windows/submit.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
 4 | set BACKEND_SECRET_KEY=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
 5 | 
 6 | set tmp="%1"
 7 | if "%tmp:"=.%"==".." (
 8 |     echo "Please give hash key as argument."
 9 | ) else (
10 |     backend.ai run --exec "python test.py build_matrix.py %tmp%" python3 test.py build_matrix.py
11 | )
12 | 


--------------------------------------------------------------------------------
/lab_asssigment/2_lab_build_matrix/windows/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_build_matrix.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/movie_rating.csv') as response:
 63 |     example_txt = response.read().decode('utf-8')
 64 |     f = open("movie_rating.csv", "w")
 65 |     f.write(example_txt)
 66 |     f.close()
 67 | 
 68 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/1000i.csv') as response:
 69 |     example_txt = response.read().decode('utf-8')
 70 |     f = open("1000i.csv", "w")
 71 |     f.write(example_txt)
 72 |     f.close()
 73 | 
 74 | 
 75 | test_module = types.ModuleType(
 76 |     'test_code',
 77 |     doc='Test case')
 78 | 
 79 | exec(test_code, test_module.__dict__)
 80 | sys.modules['test_code'] = test_module
 81 | 
 82 | import test_code as tc
 83 | loader = unittest.loader.defaultTestLoader
 84 | null_stream = open(os.devnull, "w")
 85 | test_suite = loader.loadTestsFromModule(tc)
 86 | result = unittest.TextTestRunner(
 87 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 88 | 
 89 | print("Generating result sheet...")
 90 | print("-------------------------------------------------------------------")
 91 | print("                 Test Case |  Passed? |   Feedback")
 92 | print("-------------------------------------------------------------------")
 93 | for c, r in result.tests_run:
 94 |     print("{0:s} |  {1:s}  | {2} ".format(
 95 |         c.rsplit('.', 1)[1].rjust(26),
 96 |         "PASSED" if r == 1 else "FAILED",
 97 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 98 | 
 99 | # print(json.dumps(result.tests_run))
100 | print("Reading source file...")
101 | 
102 | file = open(filename, "r")
103 | print("Transferring results to server...")
104 | payload = {
105 |     'hashkey': hashkey,
106 |     'result': result.tests_run,
107 |     'code': file.read()
108 | }
109 | try:
110 |     data = urllib.parse.urlencode(payload)
111 |     data = data.encode('ascii')
112 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
113 |     with urllib.request.urlopen(req) as response:
114 |         resp = response.read()
115 | 
116 |     if json.loads(resp)['result'] == 0:
117 |         print("Transfer failed: hash key is already used.")
118 |     else:
119 |         print("Transfer completed.")
120 | 
121 | except Exception as e:
122 |     print("Error occurred on transferring.", e)
123 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/lab_linear_model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamLab/machine_learning_from_scratch_with_python/d52439f9536d98f28164e8c1ecc6e9070843b971/lab_asssigment/5_normal_equation/lab_linear_model.pdf


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linear_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class LinearRegression(object):
 4 |     def __init__(self, fit_intercept=True, copy_X=True):
 5 |         self.fit_intercept = fit_intercept
 6 |         self.copy_X = copy_X
 7 | 
 8 |         self._coef = None
 9 |         self._intercept = None
10 |         self._new_X = None
11 | 
12 |     def fit(self, X, y):
13 |         pass
14 | 
15 |     def predict(self, X):
16 |         pass
17 | 
18 |     @property
19 |     def coef(self):
20 |         return self._coef
21 | 
22 |     @property
23 |     def intercept(self):
24 |         return self._intercept
25 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linux_mac/install.sh:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linux_mac/linear_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LinearRegression(object):
 5 |     def __init__(self, fit_intercept=True, copy_X=True):
 6 |         self.fit_intercept = fit_intercept
 7 |         self.copy_X = copy_X
 8 | 
 9 |         self._coef = None
10 |         self._intercept = None
11 |         self._new_X = None
12 | 
13 |     def fit(self, X, y):
14 |         pass
15 | 
16 |     def predict(self, X):
17 |         pass
18 | 
19 |     @property
20 |     def coef(self):
21 |         return self._coef
22 | 
23 |     @property
24 |     def intercept(self):
25 |         return self._intercept
26 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linux_mac/mlr09.csv:
--------------------------------------------------------------------------------
1 | ﻿height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
2 | 6.8,225,0.442,0.672,9.26.3,180,0.435,0.797,11.76.4,190,0.456,0.761,15.86.2,180,0.416,0.651,8.66.9,205,0.449,0.9,23.26.4,225,0.431,0.78,27.46.3,185,0.487,0.771,9.36.8,235,0.469,0.75,166.9,235,0.435,0.818,4.76.7,210,0.48,0.825,12.56.9,245,0.516,0.632,20.16.9,245,0.493,0.757,9.16.3,185,0.374,0.709,8.16.1,185,0.424,0.782,8.66.2,180,0.441,0.775,20.36.8,220,0.503,0.88,256.5,194,0.503,0.833,19.27.6,225,0.425,0.571,3.36.3,210,0.371,0.816,11.27.1,240,0.504,0.714,10.56.8,225,0.4,0.765,10.17.3,263,0.482,0.655,7.26.4,210,0.475,0.244,13.66.8,235,0.428,0.728,97.2,230,0.559,0.721,24.66.4,190,0.441,0.757,12.66.6,220,0.492,0.747,5.66.8,210,0.402,0.739,8.76.1,180,0.415,0.713,7.76.5,235,0.492,0.742,24.16.4,185,0.484,0.861,11.76,175,0.387,0.721,7.76,192,0.436,0.785,9.67.3,263,0.482,0.655,7.26.1,180,0.34,0.821,12.36.7,240,0.516,0.728,8.96.4,210,0.475,0.846,13.65.8,160,0.412,0.813,11.26.9,230,0.411,0.595,2.87,245,0.407,0.573,3.27.3,228,0.445,0.726,9.45.9,155,0.291,0.707,11.96.2,200,0.449,0.804,15.46.8,235,0.546,0.784,7.47,235,0.48,0.744,18.95.9,105,0.359,0.839,7.96.1,180,0.528,0.79,12.25.7,185,0.352,0.701,117.1,245,0.414,0.778,2.85.8,180,0.425,0.872,11.87.4,240,0.599,0.713,17.16.8,225,0.482,0.701,11.66.8,215,0.457,0.734,5.87,230,0.435,0.764,8.3
3 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linux_mac/submit.sh:
--------------------------------------------------------------------------------
1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
2 | export BACKEND_SECRET_KEY=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
3 | if [ $# -eq 0 ]
4 | then
5 |   echo "Please give hash key as argument."
6 | else
7 |   backend.ai run --exec "python test.py linear_model.py $1" python3 test.py linear_model.py test.csv train.csv mlr09.csv
8 | fi
9 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linux_mac/test.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 77,79.77515201
  3 | 21,23.17727887
  4 | 22,25.60926156
  5 | 20,17.85738813
  6 | 36,41.84986439
  7 | 15,9.805234876
  8 | 62,58.87465933
  9 | 95,97.61793701
 10 | 20,18.39512747
 11 | 5,8.746747654
 12 | 4,2.811415826
 13 | 19,17.09537241
 14 | 96,95.14907176
 15 | 62,61.38800663
 16 | 36,40.24701716
 17 | 15,14.82248589
 18 | 65,66.95806869
 19 | 14,16.63507984
 20 | 87,90.65513736
 21 | 69,77.22982636
 22 | 89,92.11906278
 23 | 51,46.91387709
 24 | 89,89.82634442
 25 | 27,21.71380347
 26 | 97,97.41206981
 27 | 58,57.01631363
 28 | 79,78.31056542
 29 | 21,19.1315097
 30 | 93,93.03483388
 31 | 27,26.59112396
 32 | 99,97.55155344
 33 | 31,31.43524822
 34 | 33,35.12724777
 35 | 80,78.61042432
 36 | 28,33.07112825
 37 | 47,51.69967172
 38 | 53,53.62235225
 39 | 69,69.46306072
 40 | 28,27.42497237
 41 | 33,36.34644189
 42 | 91,95.06140858
 43 | 71,68.16724757
 44 | 50,50.96155532
 45 | 76,78.04237454
 46 | 4,5.607664865
 47 | 37,36.11334779
 48 | 70,67.2352155
 49 | 68,65.01324035
 50 | 40,38.14753871
 51 | 35,34.31141446
 52 | 94,95.28503937
 53 | 88,87.84749912
 54 | 52,54.08170635
 55 | 31,31.93063515
 56 | 59,59.61247085
 57 | 0,-1.040114209
 58 | 39,47.49374765
 59 | 64,62.60089773
 60 | 69,70.9146434
 61 | 57,56.14834113
 62 | 13,14.05572877
 63 | 72,68.11367147
 64 | 76,75.59701346
 65 | 61,59.225745
 66 | 82,85.45504157
 67 | 18,17.76197116
 68 | 41,38.68888682
 69 | 50,50.96343637
 70 | 55,51.83503872
 71 | 13,17.0761107
 72 | 46,46.56141773
 73 | 13,10.34754461
 74 | 79,77.91032969
 75 | 53,50.17008622
 76 | 15,13.25690647
 77 | 28,31.32274932
 78 | 81,73.9308764
 79 | 69,74.45114379
 80 | 52,52.01932286
 81 | 84,83.68820499
 82 | 68,70.3698748
 83 | 27,23.44479161
 84 | 56,49.83051801
 85 | 48,49.88226593
 86 | 40,41.04525583
 87 | 39,33.37834391
 88 | 82,81.29750133
 89 | 100,105.5918375
 90 | 59,56.82457013
 91 | 43,48.67252645
 92 | 67,67.02150613
 93 | 38,38.43076389
 94 | 63,58.61466887
 95 | 91,89.12377509
 96 | 60,60.9105427
 97 | 14,13.83959878
 98 | 21,16.89085185
 99 | 87,84.06676818
100 | 73,70.34969772
101 | 32,33.38474138
102 | 2,-1.63296825
103 | 82,88.54475895
104 | 19,17.44047622
105 | 74,75.69298554
106 | 42,41.97607107
107 | 12,12.59244741
108 | 1,0.275307261
109 | 90,98.13258005
110 | 89,87.45721555
111 | 0,-2.344738542
112 | 41,39.3294153
113 | 16,16.68715211
114 | 94,96.58888601
115 | 97,97.70342201
116 | 66,67.01715955
117 | 24,25.63476257
118 | 17,13.41310757
119 | 90,95.15647284
120 | 13,9.744164258
121 | 0,-3.467883789
122 | 64,62.82816355
123 | 96,97.27405461
124 | 98,95.58017185
125 | 12,7.468501839
126 | 41,45.44599591
127 | 47,46.69013968
128 | 78,74.4993599
129 | 20,21.63500655
130 | 89,91.59548851
131 | 29,26.49487961
132 | 64,67.38654703
133 | 75,74.25362837
134 | 12,12.07991648
135 | 25,21.32273728
136 | 28,29.31770045
137 | 30,26.48713683
138 | 65,68.94699774
139 | 59,59.10598995
140 | 64,64.37521087
141 | 53,60.20758349
142 | 71,70.34329706
143 | 97,97.1082562
144 | 73,75.7584178
145 | 9,10.80462727
146 | 12,12.11219941
147 | 63,63.28312382
148 | 99,98.03017721
149 | 60,63.19354354
150 | 35,34.8534823
151 | 2,-2.819913974
152 | 60,59.8313966
153 | 32,29.38505024
154 | 94,97.00148372
155 | 84,85.18657275
156 | 63,61.74063192
157 | 22,18.84798163
158 | 81,78.79008525
159 | 93,95.12400481
160 | 33,30.48881287
161 | 7,10.41468095
162 | 42,38.98317436
163 | 46,46.11021062
164 | 54,52.45103628
165 | 16,21.16523945
166 | 49,52.28620611
167 | 43,44.18863945
168 | 95,97.13832018
169 | 66,67.22008001
170 | 21,18.98322306
171 | 35,24.3884599
172 | 80,79.44769523
173 | 37,40.03504862
174 | 54,53.32005764
175 | 56,54.55446979
176 | 1,-2.761182595
177 | 32,37.80182795
178 | 58,57.48741435
179 | 32,36.06292994
180 | 46,49.83538167
181 | 72,74.68953276
182 | 17,14.86159401
183 | 97,101.0697879
184 | 93,99.43577876
185 | 91,91.69240746
186 | 37,34.12473248
187 | 4,6.079390073
188 | 54,59.07247174
189 | 51,56.43046022
190 | 27,30.49412933
191 | 46,48.35172635
192 | 92,89.73153611
193 | 73,72.86282528
194 | 77,80.97144285
195 | 91,91.36566374
196 | 61,60.07137496
197 | 99,99.87382707
198 | 4,8.655714172
199 | 72,69.39858505
200 | 19,19.38780134
201 | 57,53.11628433
202 | 78,78.39683006
203 | 26,25.75612514
204 | 74,75.07484683
205 | 90,92.88772282
206 | 66,69.45498498
207 | 13,13.12109842
208 | 40,48.09843134
209 | 77,79.3142548
210 | 67,68.48820749
211 | 75,73.2300846
212 | 23,24.68362712
213 | 45,41.90368917
214 | 59,62.22635684
215 | 44,45.96396877
216 | 23,23.52647153
217 | 55,51.80035866
218 | 55,51.10774273
219 | 95,95.79747345
220 | 12,9.241138977
221 | 4,7.646529763
222 | 7,9.281699753
223 | 100,103.5266162
224 | 48,47.41006725
225 | 42,42.03835773
226 | 96,96.11982476
227 | 39,38.05766408
228 | 100,105.4503788
229 | 87,88.80306911
230 | 14,15.49301141
231 | 14,12.42624606
232 | 37,40.00709598
233 | 5,5.634030902
234 | 88,87.36938931
235 | 91,89.73951993
236 | 65,66.61499643
237 | 74,72.9138853
238 | 56,57.19103506
239 | 16,11.21710477
240 | 5,0.676076749
241 | 28,28.15668543
242 | 92,95.3958003
243 | 46,52.05490703
244 | 54,59.70864577
245 | 39,36.79224762
246 | 44,37.08457698
247 | 31,24.18437976
248 | 68,67.28725332
249 | 86,82.870594
250 | 90,89.899991
251 | 38,36.94173178
252 | 21,19.87562242
253 | 95,90.71481654
254 | 56,61.09367762
255 | 60,60.11134958
256 | 65,64.83296316
257 | 78,81.40381769
258 | 89,92.40217686
259 | 6,2.576625376
260 | 67,63.80768172
261 | 36,38.67780759
262 | 16,16.82839701
263 | 100,99.78687252
264 | 45,44.68913433
265 | 73,71.00377824
266 | 57,51.57326718
267 | 20,19.87846479
268 | 76,79.50341495
269 | 34,34.58876491
270 | 55,55.7383467
271 | 72,68.19721905
272 | 55,55.81628509
273 | 8,9.391416798
274 | 56,56.01448111
275 | 72,77.9969477
276 | 58,55.37049953
277 | 6,11.89457829
278 | 96,94.79081712
279 | 23,25.69041546
280 | 58,53.52042319
281 | 23,18.31396758
282 | 19,21.42637785
283 | 25,30.41303282
284 | 64,67.68142149
285 | 21,17.0854783
286 | 59,60.91792707
287 | 19,14.99514319
288 | 16,16.74923937
289 | 42,41.46923883
290 | 43,42.84526108
291 | 61,59.12912974
292 | 92,91.30863673
293 | 11,8.673336357
294 | 41,39.31485292
295 | 1,5.313686205
296 | 8,5.405220518
297 | 71,68.5458879
298 | 46,47.33487629
299 | 55,54.09063686
300 | 62,63.29717058
301 | 47,52.45946688


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/linux_mac/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | 
 63 | test_module = types.ModuleType(
 64 |     'test_code',
 65 |     doc='Test case')
 66 | 
 67 | exec(test_code, test_module.__dict__)
 68 | sys.modules['test_code'] = test_module
 69 | 
 70 | import test_code as tc
 71 | loader = unittest.loader.defaultTestLoader
 72 | null_stream = open(os.devnull, "w")
 73 | test_suite = loader.loadTestsFromModule(tc)
 74 | result = unittest.TextTestRunner(
 75 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 76 | 
 77 | print("Generating result sheet...")
 78 | print("-------------------------------------------------------------------")
 79 | print("                 Test Case |  Passed? |   Feedback")
 80 | print("-------------------------------------------------------------------")
 81 | for c, r in result.tests_run:
 82 |     print("{0:s} |  {1:s}  | {2} ".format(
 83 |         c.rsplit('.', 1)[1].rjust(26),
 84 |         "PASSED" if r == 1 else "FAILED",
 85 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 86 | 
 87 | # print(json.dumps(result.tests_run))
 88 | print("Reading source file...")
 89 | 
 90 | file = open(filename, "r")
 91 | print("Transferring results to server...")
 92 | payload = {
 93 |     'hashkey': hashkey,
 94 |     'result': result.tests_run,
 95 |     'code': file.read()
 96 | }
 97 | try:
 98 |     data = urllib.parse.urlencode(payload)
 99 |     data = data.encode('ascii')
100 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
101 |     with urllib.request.urlopen(req) as response:
102 |         resp = response.read()
103 | 
104 |     if json.loads(resp)['result'] == 0:
105 |         print("Transfer failed: hash key is already used.")
106 |     else:
107 |         print("Transfer completed.")
108 | 
109 | except Exception as e:
110 |     print("Error occurred on transferring.", e)
111 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/windows/install.bat:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/windows/linear_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LinearRegression(object):
 5 |     def __init__(self, fit_intercept=True, copy_X=True):
 6 |         self.fit_intercept = fit_intercept
 7 |         self.copy_X = copy_X
 8 | 
 9 |         self._coef = None
10 |         self._intercept = None
11 |         self._new_X = None
12 | 
13 |     def fit(self, X, y):
14 |         pass
15 | 
16 |     def predict(self, X):
17 |         pass
18 | 
19 |     @property
20 |     def coef(self):
21 |         return self._coef
22 | 
23 |     @property
24 |     def intercept(self):
25 |         return self._intercept
26 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/windows/mlr09.csv:
--------------------------------------------------------------------------------
1 | ﻿height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
2 | 6.8,225,0.442,0.672,9.26.3,180,0.435,0.797,11.76.4,190,0.456,0.761,15.86.2,180,0.416,0.651,8.66.9,205,0.449,0.9,23.26.4,225,0.431,0.78,27.46.3,185,0.487,0.771,9.36.8,235,0.469,0.75,166.9,235,0.435,0.818,4.76.7,210,0.48,0.825,12.56.9,245,0.516,0.632,20.16.9,245,0.493,0.757,9.16.3,185,0.374,0.709,8.16.1,185,0.424,0.782,8.66.2,180,0.441,0.775,20.36.8,220,0.503,0.88,256.5,194,0.503,0.833,19.27.6,225,0.425,0.571,3.36.3,210,0.371,0.816,11.27.1,240,0.504,0.714,10.56.8,225,0.4,0.765,10.17.3,263,0.482,0.655,7.26.4,210,0.475,0.244,13.66.8,235,0.428,0.728,97.2,230,0.559,0.721,24.66.4,190,0.441,0.757,12.66.6,220,0.492,0.747,5.66.8,210,0.402,0.739,8.76.1,180,0.415,0.713,7.76.5,235,0.492,0.742,24.16.4,185,0.484,0.861,11.76,175,0.387,0.721,7.76,192,0.436,0.785,9.67.3,263,0.482,0.655,7.26.1,180,0.34,0.821,12.36.7,240,0.516,0.728,8.96.4,210,0.475,0.846,13.65.8,160,0.412,0.813,11.26.9,230,0.411,0.595,2.87,245,0.407,0.573,3.27.3,228,0.445,0.726,9.45.9,155,0.291,0.707,11.96.2,200,0.449,0.804,15.46.8,235,0.546,0.784,7.47,235,0.48,0.744,18.95.9,105,0.359,0.839,7.96.1,180,0.528,0.79,12.25.7,185,0.352,0.701,117.1,245,0.414,0.778,2.85.8,180,0.425,0.872,11.87.4,240,0.599,0.713,17.16.8,225,0.482,0.701,11.66.8,215,0.457,0.734,5.87,230,0.435,0.764,8.3
3 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/windows/submit.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
 4 | set BACKEND_SECRET_KEY=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
 5 | 
 6 | set tmp="%1"
 7 | if "%tmp:"=.%"==".." (
 8 |     echo "Please give hash key as argument."
 9 | ) else (
10 |     backend.ai run --exec "python test.py linear_model.py %tmp%" python3 test.py linear_model.py test.csv train.csv mlr09.csv
11 | )
12 | 


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/windows/test.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 77,79.77515201
  3 | 21,23.17727887
  4 | 22,25.60926156
  5 | 20,17.85738813
  6 | 36,41.84986439
  7 | 15,9.805234876
  8 | 62,58.87465933
  9 | 95,97.61793701
 10 | 20,18.39512747
 11 | 5,8.746747654
 12 | 4,2.811415826
 13 | 19,17.09537241
 14 | 96,95.14907176
 15 | 62,61.38800663
 16 | 36,40.24701716
 17 | 15,14.82248589
 18 | 65,66.95806869
 19 | 14,16.63507984
 20 | 87,90.65513736
 21 | 69,77.22982636
 22 | 89,92.11906278
 23 | 51,46.91387709
 24 | 89,89.82634442
 25 | 27,21.71380347
 26 | 97,97.41206981
 27 | 58,57.01631363
 28 | 79,78.31056542
 29 | 21,19.1315097
 30 | 93,93.03483388
 31 | 27,26.59112396
 32 | 99,97.55155344
 33 | 31,31.43524822
 34 | 33,35.12724777
 35 | 80,78.61042432
 36 | 28,33.07112825
 37 | 47,51.69967172
 38 | 53,53.62235225
 39 | 69,69.46306072
 40 | 28,27.42497237
 41 | 33,36.34644189
 42 | 91,95.06140858
 43 | 71,68.16724757
 44 | 50,50.96155532
 45 | 76,78.04237454
 46 | 4,5.607664865
 47 | 37,36.11334779
 48 | 70,67.2352155
 49 | 68,65.01324035
 50 | 40,38.14753871
 51 | 35,34.31141446
 52 | 94,95.28503937
 53 | 88,87.84749912
 54 | 52,54.08170635
 55 | 31,31.93063515
 56 | 59,59.61247085
 57 | 0,-1.040114209
 58 | 39,47.49374765
 59 | 64,62.60089773
 60 | 69,70.9146434
 61 | 57,56.14834113
 62 | 13,14.05572877
 63 | 72,68.11367147
 64 | 76,75.59701346
 65 | 61,59.225745
 66 | 82,85.45504157
 67 | 18,17.76197116
 68 | 41,38.68888682
 69 | 50,50.96343637
 70 | 55,51.83503872
 71 | 13,17.0761107
 72 | 46,46.56141773
 73 | 13,10.34754461
 74 | 79,77.91032969
 75 | 53,50.17008622
 76 | 15,13.25690647
 77 | 28,31.32274932
 78 | 81,73.9308764
 79 | 69,74.45114379
 80 | 52,52.01932286
 81 | 84,83.68820499
 82 | 68,70.3698748
 83 | 27,23.44479161
 84 | 56,49.83051801
 85 | 48,49.88226593
 86 | 40,41.04525583
 87 | 39,33.37834391
 88 | 82,81.29750133
 89 | 100,105.5918375
 90 | 59,56.82457013
 91 | 43,48.67252645
 92 | 67,67.02150613
 93 | 38,38.43076389
 94 | 63,58.61466887
 95 | 91,89.12377509
 96 | 60,60.9105427
 97 | 14,13.83959878
 98 | 21,16.89085185
 99 | 87,84.06676818
100 | 73,70.34969772
101 | 32,33.38474138
102 | 2,-1.63296825
103 | 82,88.54475895
104 | 19,17.44047622
105 | 74,75.69298554
106 | 42,41.97607107
107 | 12,12.59244741
108 | 1,0.275307261
109 | 90,98.13258005
110 | 89,87.45721555
111 | 0,-2.344738542
112 | 41,39.3294153
113 | 16,16.68715211
114 | 94,96.58888601
115 | 97,97.70342201
116 | 66,67.01715955
117 | 24,25.63476257
118 | 17,13.41310757
119 | 90,95.15647284
120 | 13,9.744164258
121 | 0,-3.467883789
122 | 64,62.82816355
123 | 96,97.27405461
124 | 98,95.58017185
125 | 12,7.468501839
126 | 41,45.44599591
127 | 47,46.69013968
128 | 78,74.4993599
129 | 20,21.63500655
130 | 89,91.59548851
131 | 29,26.49487961
132 | 64,67.38654703
133 | 75,74.25362837
134 | 12,12.07991648
135 | 25,21.32273728
136 | 28,29.31770045
137 | 30,26.48713683
138 | 65,68.94699774
139 | 59,59.10598995
140 | 64,64.37521087
141 | 53,60.20758349
142 | 71,70.34329706
143 | 97,97.1082562
144 | 73,75.7584178
145 | 9,10.80462727
146 | 12,12.11219941
147 | 63,63.28312382
148 | 99,98.03017721
149 | 60,63.19354354
150 | 35,34.8534823
151 | 2,-2.819913974
152 | 60,59.8313966
153 | 32,29.38505024
154 | 94,97.00148372
155 | 84,85.18657275
156 | 63,61.74063192
157 | 22,18.84798163
158 | 81,78.79008525
159 | 93,95.12400481
160 | 33,30.48881287
161 | 7,10.41468095
162 | 42,38.98317436
163 | 46,46.11021062
164 | 54,52.45103628
165 | 16,21.16523945
166 | 49,52.28620611
167 | 43,44.18863945
168 | 95,97.13832018
169 | 66,67.22008001
170 | 21,18.98322306
171 | 35,24.3884599
172 | 80,79.44769523
173 | 37,40.03504862
174 | 54,53.32005764
175 | 56,54.55446979
176 | 1,-2.761182595
177 | 32,37.80182795
178 | 58,57.48741435
179 | 32,36.06292994
180 | 46,49.83538167
181 | 72,74.68953276
182 | 17,14.86159401
183 | 97,101.0697879
184 | 93,99.43577876
185 | 91,91.69240746
186 | 37,34.12473248
187 | 4,6.079390073
188 | 54,59.07247174
189 | 51,56.43046022
190 | 27,30.49412933
191 | 46,48.35172635
192 | 92,89.73153611
193 | 73,72.86282528
194 | 77,80.97144285
195 | 91,91.36566374
196 | 61,60.07137496
197 | 99,99.87382707
198 | 4,8.655714172
199 | 72,69.39858505
200 | 19,19.38780134
201 | 57,53.11628433
202 | 78,78.39683006
203 | 26,25.75612514
204 | 74,75.07484683
205 | 90,92.88772282
206 | 66,69.45498498
207 | 13,13.12109842
208 | 40,48.09843134
209 | 77,79.3142548
210 | 67,68.48820749
211 | 75,73.2300846
212 | 23,24.68362712
213 | 45,41.90368917
214 | 59,62.22635684
215 | 44,45.96396877
216 | 23,23.52647153
217 | 55,51.80035866
218 | 55,51.10774273
219 | 95,95.79747345
220 | 12,9.241138977
221 | 4,7.646529763
222 | 7,9.281699753
223 | 100,103.5266162
224 | 48,47.41006725
225 | 42,42.03835773
226 | 96,96.11982476
227 | 39,38.05766408
228 | 100,105.4503788
229 | 87,88.80306911
230 | 14,15.49301141
231 | 14,12.42624606
232 | 37,40.00709598
233 | 5,5.634030902
234 | 88,87.36938931
235 | 91,89.73951993
236 | 65,66.61499643
237 | 74,72.9138853
238 | 56,57.19103506
239 | 16,11.21710477
240 | 5,0.676076749
241 | 28,28.15668543
242 | 92,95.3958003
243 | 46,52.05490703
244 | 54,59.70864577
245 | 39,36.79224762
246 | 44,37.08457698
247 | 31,24.18437976
248 | 68,67.28725332
249 | 86,82.870594
250 | 90,89.899991
251 | 38,36.94173178
252 | 21,19.87562242
253 | 95,90.71481654
254 | 56,61.09367762
255 | 60,60.11134958
256 | 65,64.83296316
257 | 78,81.40381769
258 | 89,92.40217686
259 | 6,2.576625376
260 | 67,63.80768172
261 | 36,38.67780759
262 | 16,16.82839701
263 | 100,99.78687252
264 | 45,44.68913433
265 | 73,71.00377824
266 | 57,51.57326718
267 | 20,19.87846479
268 | 76,79.50341495
269 | 34,34.58876491
270 | 55,55.7383467
271 | 72,68.19721905
272 | 55,55.81628509
273 | 8,9.391416798
274 | 56,56.01448111
275 | 72,77.9969477
276 | 58,55.37049953
277 | 6,11.89457829
278 | 96,94.79081712
279 | 23,25.69041546
280 | 58,53.52042319
281 | 23,18.31396758
282 | 19,21.42637785
283 | 25,30.41303282
284 | 64,67.68142149
285 | 21,17.0854783
286 | 59,60.91792707
287 | 19,14.99514319
288 | 16,16.74923937
289 | 42,41.46923883
290 | 43,42.84526108
291 | 61,59.12912974
292 | 92,91.30863673
293 | 11,8.673336357
294 | 41,39.31485292
295 | 1,5.313686205
296 | 8,5.405220518
297 | 71,68.5458879
298 | 46,47.33487629
299 | 55,54.09063686
300 | 62,63.29717058
301 | 47,52.45946688


--------------------------------------------------------------------------------
/lab_asssigment/5_normal_equation/windows/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | 
 63 | test_module = types.ModuleType(
 64 |     'test_code',
 65 |     doc='Test case')
 66 | 
 67 | exec(test_code, test_module.__dict__)
 68 | sys.modules['test_code'] = test_module
 69 | 
 70 | import test_code as tc
 71 | loader = unittest.loader.defaultTestLoader
 72 | null_stream = open(os.devnull, "w")
 73 | test_suite = loader.loadTestsFromModule(tc)
 74 | result = unittest.TextTestRunner(
 75 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 76 | 
 77 | print("Generating result sheet...")
 78 | print("-------------------------------------------------------------------")
 79 | print("                 Test Case |  Passed? |   Feedback")
 80 | print("-------------------------------------------------------------------")
 81 | for c, r in result.tests_run:
 82 |     print("{0:s} |  {1:s}  | {2} ".format(
 83 |         c.rsplit('.', 1)[1].rjust(26),
 84 |         "PASSED" if r == 1 else "FAILED",
 85 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 86 | 
 87 | # print(json.dumps(result.tests_run))
 88 | print("Reading source file...")
 89 | 
 90 | file = open(filename, "r")
 91 | print("Transferring results to server...")
 92 | payload = {
 93 |     'hashkey': hashkey,
 94 |     'result': result.tests_run,
 95 |     'code': file.read()
 96 | }
 97 | try:
 98 |     data = urllib.parse.urlencode(payload)
 99 |     data = data.encode('ascii')
100 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
101 |     with urllib.request.urlopen(req) as response:
102 |         resp = response.read()
103 | 
104 |     if json.loads(resp)['result'] == 0:
105 |         print("Transfer failed: hash key is already used.")
106 |     else:
107 |         print("Transfer completed.")
108 | 
109 | except Exception as e:
110 |     print("Error occurred on transferring.", e)
111 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linear_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LinearRegressionGD(object):
 5 |     def __init__(self, fit_intercept=True, copy_X=True,
 6 |                  eta0=0.001, epochs=1000, weight_decay=0.9):
 7 |         self.fit_intercept = fit_intercept
 8 |         self.copy_X = copy_X
 9 |         self._eta0 = eta0
10 |         self._epochs = epochs
11 | 
12 |         self._cost_history = []
13 | 
14 |         self._coef = None
15 |         self._intercept = None
16 |         self._new_X = None
17 |         self._w_history = None
18 |         self._weight_decay = weight_decay
19 | 
20 |     def cost(self, h, y):
21 |         pass
22 | 
23 |     def hypothesis_function(self, X, theta):
24 |         pass
25 | 
26 |     def gradient(self, X, y, theta):
27 |         pass
28 | 
29 |     def fit(self, X, y):
30 |         # Write your code
31 | 
32 |         for epoch in range(self._epochs):
33 |             # 아래 코드를 반드시 활용할 것
34 |             gradient = self.gradient(self._new_X, y, theta).flatten()
35 | 
36 |             # Write your code
37 | 
38 |             if epoch % 100 == 0:
39 |                 self._w_history.append(theta)
40 |                 cost = self.cost(
41 |                     self.hypothesis_function(self._new_X, theta), y)
42 |                 self._cost_history.append(cost)
43 |             self._eta0 = self._eta0 * self._weight_decay
44 | 
45 |         # Write your code
46 | 
47 |     def predict(self, X):
48 |         pass
49 | 
50 |     @property
51 |     def coef(self):
52 |         return self._coef
53 | 
54 |     @property
55 |     def intercept(self):
56 |         return self._intercept
57 | 
58 |     @property
59 |     def weights_history(self):
60 |         return np.array(self._w_history)
61 | 
62 |     @property
63 |     def cost_history(self):
64 |         return self._cost_history
65 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linux_mac/install.sh:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linux_mac/linear_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LinearRegressionGD(object):
 5 |     def __init__(self, fit_intercept=True, copy_X=True,
 6 |                  eta0=0.001, epochs=1000, weight_decay=0.9):
 7 |         self.fit_intercept = fit_intercept
 8 |         self.copy_X = copy_X
 9 |         self._eta0 = eta0
10 |         self._epochs = epochs
11 | 
12 |         self._cost_history = []
13 | 
14 |         self._coef = None
15 |         self._intercept = None
16 |         self._new_X = None
17 |         self._w_history = None
18 |         self._weight_decay = weight_decay
19 | 
20 |     def cost(self, h, y):
21 |         pass
22 | 
23 |     def hypothesis_function(self, X, theta):
24 |         pass
25 | 
26 |     def gradient(self, X, y, theta):
27 |         pass
28 | 
29 |     def fit(self, X, y):
30 |         # Write your code
31 | 
32 |         for epoch in range(self._epochs):
33 |             # 아래 코드를 반드시 활용할 것
34 |             gradient = self.gradient(self._new_X, y, theta).flatten()
35 | 
36 |             # Write your code
37 | 
38 |             if epoch % 100 == 0:
39 |                 self._w_history.append(theta)
40 |                 cost = self.cost(
41 |                     self.hypothesis_function(self._new_X, theta), y)
42 |                 self._cost_history.append(cost)
43 |             self._eta0 = self._eta0 * self._weight_decay
44 | 
45 |         # Write your code
46 | 
47 |     def predict(self, X):
48 |         pass
49 | 
50 |     @property
51 |     def coef(self):
52 |         return self._coef
53 | 
54 |     @property
55 |     def intercept(self):
56 |         return self._intercept
57 | 
58 |     @property
59 |     def weights_history(self):
60 |         return np.array(self._w_history)
61 | 
62 |     @property
63 |     def cost_history(self):
64 |         return self._cost_history
65 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linux_mac/mlr09.csv:
--------------------------------------------------------------------------------
1 | ﻿height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
2 | 6.8,225,0.442,0.672,9.26.3,180,0.435,0.797,11.76.4,190,0.456,0.761,15.86.2,180,0.416,0.651,8.66.9,205,0.449,0.9,23.26.4,225,0.431,0.78,27.46.3,185,0.487,0.771,9.36.8,235,0.469,0.75,166.9,235,0.435,0.818,4.76.7,210,0.48,0.825,12.56.9,245,0.516,0.632,20.16.9,245,0.493,0.757,9.16.3,185,0.374,0.709,8.16.1,185,0.424,0.782,8.66.2,180,0.441,0.775,20.36.8,220,0.503,0.88,256.5,194,0.503,0.833,19.27.6,225,0.425,0.571,3.36.3,210,0.371,0.816,11.27.1,240,0.504,0.714,10.56.8,225,0.4,0.765,10.17.3,263,0.482,0.655,7.26.4,210,0.475,0.244,13.66.8,235,0.428,0.728,97.2,230,0.559,0.721,24.66.4,190,0.441,0.757,12.66.6,220,0.492,0.747,5.66.8,210,0.402,0.739,8.76.1,180,0.415,0.713,7.76.5,235,0.492,0.742,24.16.4,185,0.484,0.861,11.76,175,0.387,0.721,7.76,192,0.436,0.785,9.67.3,263,0.482,0.655,7.26.1,180,0.34,0.821,12.36.7,240,0.516,0.728,8.96.4,210,0.475,0.846,13.65.8,160,0.412,0.813,11.26.9,230,0.411,0.595,2.87,245,0.407,0.573,3.27.3,228,0.445,0.726,9.45.9,155,0.291,0.707,11.96.2,200,0.449,0.804,15.46.8,235,0.546,0.784,7.47,235,0.48,0.744,18.95.9,105,0.359,0.839,7.96.1,180,0.528,0.79,12.25.7,185,0.352,0.701,117.1,245,0.414,0.778,2.85.8,180,0.425,0.872,11.87.4,240,0.599,0.713,17.16.8,225,0.482,0.701,11.66.8,215,0.457,0.734,5.87,230,0.435,0.764,8.3
3 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linux_mac/submit.sh:
--------------------------------------------------------------------------------
1 | export BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
2 | export BACKEND_SECRET_KEY=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
3 | if [ $# -eq 0 ]
4 | then
5 |   echo "Please give hash key as argument."
6 | else
7 |   backend.ai run --exec "python test.py linear_model.py $1" python3 test.py linear_model.py test.csv train.csv mlr09.csv
8 | fi
9 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linux_mac/test.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 77,79.77515201
  3 | 21,23.17727887
  4 | 22,25.60926156
  5 | 20,17.85738813
  6 | 36,41.84986439
  7 | 15,9.805234876
  8 | 62,58.87465933
  9 | 95,97.61793701
 10 | 20,18.39512747
 11 | 5,8.746747654
 12 | 4,2.811415826
 13 | 19,17.09537241
 14 | 96,95.14907176
 15 | 62,61.38800663
 16 | 36,40.24701716
 17 | 15,14.82248589
 18 | 65,66.95806869
 19 | 14,16.63507984
 20 | 87,90.65513736
 21 | 69,77.22982636
 22 | 89,92.11906278
 23 | 51,46.91387709
 24 | 89,89.82634442
 25 | 27,21.71380347
 26 | 97,97.41206981
 27 | 58,57.01631363
 28 | 79,78.31056542
 29 | 21,19.1315097
 30 | 93,93.03483388
 31 | 27,26.59112396
 32 | 99,97.55155344
 33 | 31,31.43524822
 34 | 33,35.12724777
 35 | 80,78.61042432
 36 | 28,33.07112825
 37 | 47,51.69967172
 38 | 53,53.62235225
 39 | 69,69.46306072
 40 | 28,27.42497237
 41 | 33,36.34644189
 42 | 91,95.06140858
 43 | 71,68.16724757
 44 | 50,50.96155532
 45 | 76,78.04237454
 46 | 4,5.607664865
 47 | 37,36.11334779
 48 | 70,67.2352155
 49 | 68,65.01324035
 50 | 40,38.14753871
 51 | 35,34.31141446
 52 | 94,95.28503937
 53 | 88,87.84749912
 54 | 52,54.08170635
 55 | 31,31.93063515
 56 | 59,59.61247085
 57 | 0,-1.040114209
 58 | 39,47.49374765
 59 | 64,62.60089773
 60 | 69,70.9146434
 61 | 57,56.14834113
 62 | 13,14.05572877
 63 | 72,68.11367147
 64 | 76,75.59701346
 65 | 61,59.225745
 66 | 82,85.45504157
 67 | 18,17.76197116
 68 | 41,38.68888682
 69 | 50,50.96343637
 70 | 55,51.83503872
 71 | 13,17.0761107
 72 | 46,46.56141773
 73 | 13,10.34754461
 74 | 79,77.91032969
 75 | 53,50.17008622
 76 | 15,13.25690647
 77 | 28,31.32274932
 78 | 81,73.9308764
 79 | 69,74.45114379
 80 | 52,52.01932286
 81 | 84,83.68820499
 82 | 68,70.3698748
 83 | 27,23.44479161
 84 | 56,49.83051801
 85 | 48,49.88226593
 86 | 40,41.04525583
 87 | 39,33.37834391
 88 | 82,81.29750133
 89 | 100,105.5918375
 90 | 59,56.82457013
 91 | 43,48.67252645
 92 | 67,67.02150613
 93 | 38,38.43076389
 94 | 63,58.61466887
 95 | 91,89.12377509
 96 | 60,60.9105427
 97 | 14,13.83959878
 98 | 21,16.89085185
 99 | 87,84.06676818
100 | 73,70.34969772
101 | 32,33.38474138
102 | 2,-1.63296825
103 | 82,88.54475895
104 | 19,17.44047622
105 | 74,75.69298554
106 | 42,41.97607107
107 | 12,12.59244741
108 | 1,0.275307261
109 | 90,98.13258005
110 | 89,87.45721555
111 | 0,-2.344738542
112 | 41,39.3294153
113 | 16,16.68715211
114 | 94,96.58888601
115 | 97,97.70342201
116 | 66,67.01715955
117 | 24,25.63476257
118 | 17,13.41310757
119 | 90,95.15647284
120 | 13,9.744164258
121 | 0,-3.467883789
122 | 64,62.82816355
123 | 96,97.27405461
124 | 98,95.58017185
125 | 12,7.468501839
126 | 41,45.44599591
127 | 47,46.69013968
128 | 78,74.4993599
129 | 20,21.63500655
130 | 89,91.59548851
131 | 29,26.49487961
132 | 64,67.38654703
133 | 75,74.25362837
134 | 12,12.07991648
135 | 25,21.32273728
136 | 28,29.31770045
137 | 30,26.48713683
138 | 65,68.94699774
139 | 59,59.10598995
140 | 64,64.37521087
141 | 53,60.20758349
142 | 71,70.34329706
143 | 97,97.1082562
144 | 73,75.7584178
145 | 9,10.80462727
146 | 12,12.11219941
147 | 63,63.28312382
148 | 99,98.03017721
149 | 60,63.19354354
150 | 35,34.8534823
151 | 2,-2.819913974
152 | 60,59.8313966
153 | 32,29.38505024
154 | 94,97.00148372
155 | 84,85.18657275
156 | 63,61.74063192
157 | 22,18.84798163
158 | 81,78.79008525
159 | 93,95.12400481
160 | 33,30.48881287
161 | 7,10.41468095
162 | 42,38.98317436
163 | 46,46.11021062
164 | 54,52.45103628
165 | 16,21.16523945
166 | 49,52.28620611
167 | 43,44.18863945
168 | 95,97.13832018
169 | 66,67.22008001
170 | 21,18.98322306
171 | 35,24.3884599
172 | 80,79.44769523
173 | 37,40.03504862
174 | 54,53.32005764
175 | 56,54.55446979
176 | 1,-2.761182595
177 | 32,37.80182795
178 | 58,57.48741435
179 | 32,36.06292994
180 | 46,49.83538167
181 | 72,74.68953276
182 | 17,14.86159401
183 | 97,101.0697879
184 | 93,99.43577876
185 | 91,91.69240746
186 | 37,34.12473248
187 | 4,6.079390073
188 | 54,59.07247174
189 | 51,56.43046022
190 | 27,30.49412933
191 | 46,48.35172635
192 | 92,89.73153611
193 | 73,72.86282528
194 | 77,80.97144285
195 | 91,91.36566374
196 | 61,60.07137496
197 | 99,99.87382707
198 | 4,8.655714172
199 | 72,69.39858505
200 | 19,19.38780134
201 | 57,53.11628433
202 | 78,78.39683006
203 | 26,25.75612514
204 | 74,75.07484683
205 | 90,92.88772282
206 | 66,69.45498498
207 | 13,13.12109842
208 | 40,48.09843134
209 | 77,79.3142548
210 | 67,68.48820749
211 | 75,73.2300846
212 | 23,24.68362712
213 | 45,41.90368917
214 | 59,62.22635684
215 | 44,45.96396877
216 | 23,23.52647153
217 | 55,51.80035866
218 | 55,51.10774273
219 | 95,95.79747345
220 | 12,9.241138977
221 | 4,7.646529763
222 | 7,9.281699753
223 | 100,103.5266162
224 | 48,47.41006725
225 | 42,42.03835773
226 | 96,96.11982476
227 | 39,38.05766408
228 | 100,105.4503788
229 | 87,88.80306911
230 | 14,15.49301141
231 | 14,12.42624606
232 | 37,40.00709598
233 | 5,5.634030902
234 | 88,87.36938931
235 | 91,89.73951993
236 | 65,66.61499643
237 | 74,72.9138853
238 | 56,57.19103506
239 | 16,11.21710477
240 | 5,0.676076749
241 | 28,28.15668543
242 | 92,95.3958003
243 | 46,52.05490703
244 | 54,59.70864577
245 | 39,36.79224762
246 | 44,37.08457698
247 | 31,24.18437976
248 | 68,67.28725332
249 | 86,82.870594
250 | 90,89.899991
251 | 38,36.94173178
252 | 21,19.87562242
253 | 95,90.71481654
254 | 56,61.09367762
255 | 60,60.11134958
256 | 65,64.83296316
257 | 78,81.40381769
258 | 89,92.40217686
259 | 6,2.576625376
260 | 67,63.80768172
261 | 36,38.67780759
262 | 16,16.82839701
263 | 100,99.78687252
264 | 45,44.68913433
265 | 73,71.00377824
266 | 57,51.57326718
267 | 20,19.87846479
268 | 76,79.50341495
269 | 34,34.58876491
270 | 55,55.7383467
271 | 72,68.19721905
272 | 55,55.81628509
273 | 8,9.391416798
274 | 56,56.01448111
275 | 72,77.9969477
276 | 58,55.37049953
277 | 6,11.89457829
278 | 96,94.79081712
279 | 23,25.69041546
280 | 58,53.52042319
281 | 23,18.31396758
282 | 19,21.42637785
283 | 25,30.41303282
284 | 64,67.68142149
285 | 21,17.0854783
286 | 59,60.91792707
287 | 19,14.99514319
288 | 16,16.74923937
289 | 42,41.46923883
290 | 43,42.84526108
291 | 61,59.12912974
292 | 92,91.30863673
293 | 11,8.673336357
294 | 41,39.31485292
295 | 1,5.313686205
296 | 8,5.405220518
297 | 71,68.5458879
298 | 46,47.33487629
299 | 55,54.09063686
300 | 62,63.29717058
301 | 47,52.45946688


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/linux_mac/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model_gd.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | 
 63 | test_module = types.ModuleType(
 64 |     'test_code',
 65 |     doc='Test case')
 66 | 
 67 | exec(test_code, test_module.__dict__)
 68 | sys.modules['test_code'] = test_module
 69 | 
 70 | import test_code as tc
 71 | loader = unittest.loader.defaultTestLoader
 72 | null_stream = open(os.devnull, "w")
 73 | test_suite = loader.loadTestsFromModule(tc)
 74 | result = unittest.TextTestRunner(
 75 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 76 | 
 77 | print("Generating result sheet...")
 78 | print("-------------------------------------------------------------------")
 79 | print("                 Test Case |  Passed? |   Feedback")
 80 | print("-------------------------------------------------------------------")
 81 | for c, r in result.tests_run:
 82 |     print("{0:s} |  {1:s}  | {2} ".format(
 83 |         c.rsplit('.', 1)[1].rjust(26),
 84 |         "PASSED" if r == 1 else "FAILED",
 85 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 86 | 
 87 | # print(json.dumps(result.tests_run))
 88 | print("Reading source file...")
 89 | 
 90 | file = open(filename, "r")
 91 | print("Transferring results to server...")
 92 | payload = {
 93 |     'hashkey': hashkey,
 94 |     'result': result.tests_run,
 95 |     'code': file.read()
 96 | }
 97 | try:
 98 |     data = urllib.parse.urlencode(payload)
 99 |     data = data.encode('ascii')
100 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
101 |     with urllib.request.urlopen(req) as response:
102 |         resp = response.read()
103 | 
104 |     if json.loads(resp)['result'] == 0:
105 |         print("Transfer failed: hash key is already used.")
106 |     else:
107 |         print("Transfer completed.")
108 | 
109 | except Exception as e:
110 |     print("Error occurred on transferring.", e)
111 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/mlr09.csv:
--------------------------------------------------------------------------------
1 | ﻿height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
2 | 6.8,225,0.442,0.672,9.26.3,180,0.435,0.797,11.76.4,190,0.456,0.761,15.86.2,180,0.416,0.651,8.66.9,205,0.449,0.9,23.26.4,225,0.431,0.78,27.46.3,185,0.487,0.771,9.36.8,235,0.469,0.75,166.9,235,0.435,0.818,4.76.7,210,0.48,0.825,12.56.9,245,0.516,0.632,20.16.9,245,0.493,0.757,9.16.3,185,0.374,0.709,8.16.1,185,0.424,0.782,8.66.2,180,0.441,0.775,20.36.8,220,0.503,0.88,256.5,194,0.503,0.833,19.27.6,225,0.425,0.571,3.36.3,210,0.371,0.816,11.27.1,240,0.504,0.714,10.56.8,225,0.4,0.765,10.17.3,263,0.482,0.655,7.26.4,210,0.475,0.244,13.66.8,235,0.428,0.728,97.2,230,0.559,0.721,24.66.4,190,0.441,0.757,12.66.6,220,0.492,0.747,5.66.8,210,0.402,0.739,8.76.1,180,0.415,0.713,7.76.5,235,0.492,0.742,24.16.4,185,0.484,0.861,11.76,175,0.387,0.721,7.76,192,0.436,0.785,9.67.3,263,0.482,0.655,7.26.1,180,0.34,0.821,12.36.7,240,0.516,0.728,8.96.4,210,0.475,0.846,13.65.8,160,0.412,0.813,11.26.9,230,0.411,0.595,2.87,245,0.407,0.573,3.27.3,228,0.445,0.726,9.45.9,155,0.291,0.707,11.96.2,200,0.449,0.804,15.46.8,235,0.546,0.784,7.47,235,0.48,0.744,18.95.9,105,0.359,0.839,7.96.1,180,0.528,0.79,12.25.7,185,0.352,0.701,117.1,245,0.414,0.778,2.85.8,180,0.425,0.872,11.87.4,240,0.599,0.713,17.16.8,225,0.482,0.701,11.66.8,215,0.457,0.734,5.87,230,0.435,0.764,8.3
3 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/test.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 77,79.77515201
  3 | 21,23.17727887
  4 | 22,25.60926156
  5 | 20,17.85738813
  6 | 36,41.84986439
  7 | 15,9.805234876
  8 | 62,58.87465933
  9 | 95,97.61793701
 10 | 20,18.39512747
 11 | 5,8.746747654
 12 | 4,2.811415826
 13 | 19,17.09537241
 14 | 96,95.14907176
 15 | 62,61.38800663
 16 | 36,40.24701716
 17 | 15,14.82248589
 18 | 65,66.95806869
 19 | 14,16.63507984
 20 | 87,90.65513736
 21 | 69,77.22982636
 22 | 89,92.11906278
 23 | 51,46.91387709
 24 | 89,89.82634442
 25 | 27,21.71380347
 26 | 97,97.41206981
 27 | 58,57.01631363
 28 | 79,78.31056542
 29 | 21,19.1315097
 30 | 93,93.03483388
 31 | 27,26.59112396
 32 | 99,97.55155344
 33 | 31,31.43524822
 34 | 33,35.12724777
 35 | 80,78.61042432
 36 | 28,33.07112825
 37 | 47,51.69967172
 38 | 53,53.62235225
 39 | 69,69.46306072
 40 | 28,27.42497237
 41 | 33,36.34644189
 42 | 91,95.06140858
 43 | 71,68.16724757
 44 | 50,50.96155532
 45 | 76,78.04237454
 46 | 4,5.607664865
 47 | 37,36.11334779
 48 | 70,67.2352155
 49 | 68,65.01324035
 50 | 40,38.14753871
 51 | 35,34.31141446
 52 | 94,95.28503937
 53 | 88,87.84749912
 54 | 52,54.08170635
 55 | 31,31.93063515
 56 | 59,59.61247085
 57 | 0,-1.040114209
 58 | 39,47.49374765
 59 | 64,62.60089773
 60 | 69,70.9146434
 61 | 57,56.14834113
 62 | 13,14.05572877
 63 | 72,68.11367147
 64 | 76,75.59701346
 65 | 61,59.225745
 66 | 82,85.45504157
 67 | 18,17.76197116
 68 | 41,38.68888682
 69 | 50,50.96343637
 70 | 55,51.83503872
 71 | 13,17.0761107
 72 | 46,46.56141773
 73 | 13,10.34754461
 74 | 79,77.91032969
 75 | 53,50.17008622
 76 | 15,13.25690647
 77 | 28,31.32274932
 78 | 81,73.9308764
 79 | 69,74.45114379
 80 | 52,52.01932286
 81 | 84,83.68820499
 82 | 68,70.3698748
 83 | 27,23.44479161
 84 | 56,49.83051801
 85 | 48,49.88226593
 86 | 40,41.04525583
 87 | 39,33.37834391
 88 | 82,81.29750133
 89 | 100,105.5918375
 90 | 59,56.82457013
 91 | 43,48.67252645
 92 | 67,67.02150613
 93 | 38,38.43076389
 94 | 63,58.61466887
 95 | 91,89.12377509
 96 | 60,60.9105427
 97 | 14,13.83959878
 98 | 21,16.89085185
 99 | 87,84.06676818
100 | 73,70.34969772
101 | 32,33.38474138
102 | 2,-1.63296825
103 | 82,88.54475895
104 | 19,17.44047622
105 | 74,75.69298554
106 | 42,41.97607107
107 | 12,12.59244741
108 | 1,0.275307261
109 | 90,98.13258005
110 | 89,87.45721555
111 | 0,-2.344738542
112 | 41,39.3294153
113 | 16,16.68715211
114 | 94,96.58888601
115 | 97,97.70342201
116 | 66,67.01715955
117 | 24,25.63476257
118 | 17,13.41310757
119 | 90,95.15647284
120 | 13,9.744164258
121 | 0,-3.467883789
122 | 64,62.82816355
123 | 96,97.27405461
124 | 98,95.58017185
125 | 12,7.468501839
126 | 41,45.44599591
127 | 47,46.69013968
128 | 78,74.4993599
129 | 20,21.63500655
130 | 89,91.59548851
131 | 29,26.49487961
132 | 64,67.38654703
133 | 75,74.25362837
134 | 12,12.07991648
135 | 25,21.32273728
136 | 28,29.31770045
137 | 30,26.48713683
138 | 65,68.94699774
139 | 59,59.10598995
140 | 64,64.37521087
141 | 53,60.20758349
142 | 71,70.34329706
143 | 97,97.1082562
144 | 73,75.7584178
145 | 9,10.80462727
146 | 12,12.11219941
147 | 63,63.28312382
148 | 99,98.03017721
149 | 60,63.19354354
150 | 35,34.8534823
151 | 2,-2.819913974
152 | 60,59.8313966
153 | 32,29.38505024
154 | 94,97.00148372
155 | 84,85.18657275
156 | 63,61.74063192
157 | 22,18.84798163
158 | 81,78.79008525
159 | 93,95.12400481
160 | 33,30.48881287
161 | 7,10.41468095
162 | 42,38.98317436
163 | 46,46.11021062
164 | 54,52.45103628
165 | 16,21.16523945
166 | 49,52.28620611
167 | 43,44.18863945
168 | 95,97.13832018
169 | 66,67.22008001
170 | 21,18.98322306
171 | 35,24.3884599
172 | 80,79.44769523
173 | 37,40.03504862
174 | 54,53.32005764
175 | 56,54.55446979
176 | 1,-2.761182595
177 | 32,37.80182795
178 | 58,57.48741435
179 | 32,36.06292994
180 | 46,49.83538167
181 | 72,74.68953276
182 | 17,14.86159401
183 | 97,101.0697879
184 | 93,99.43577876
185 | 91,91.69240746
186 | 37,34.12473248
187 | 4,6.079390073
188 | 54,59.07247174
189 | 51,56.43046022
190 | 27,30.49412933
191 | 46,48.35172635
192 | 92,89.73153611
193 | 73,72.86282528
194 | 77,80.97144285
195 | 91,91.36566374
196 | 61,60.07137496
197 | 99,99.87382707
198 | 4,8.655714172
199 | 72,69.39858505
200 | 19,19.38780134
201 | 57,53.11628433
202 | 78,78.39683006
203 | 26,25.75612514
204 | 74,75.07484683
205 | 90,92.88772282
206 | 66,69.45498498
207 | 13,13.12109842
208 | 40,48.09843134
209 | 77,79.3142548
210 | 67,68.48820749
211 | 75,73.2300846
212 | 23,24.68362712
213 | 45,41.90368917
214 | 59,62.22635684
215 | 44,45.96396877
216 | 23,23.52647153
217 | 55,51.80035866
218 | 55,51.10774273
219 | 95,95.79747345
220 | 12,9.241138977
221 | 4,7.646529763
222 | 7,9.281699753
223 | 100,103.5266162
224 | 48,47.41006725
225 | 42,42.03835773
226 | 96,96.11982476
227 | 39,38.05766408
228 | 100,105.4503788
229 | 87,88.80306911
230 | 14,15.49301141
231 | 14,12.42624606
232 | 37,40.00709598
233 | 5,5.634030902
234 | 88,87.36938931
235 | 91,89.73951993
236 | 65,66.61499643
237 | 74,72.9138853
238 | 56,57.19103506
239 | 16,11.21710477
240 | 5,0.676076749
241 | 28,28.15668543
242 | 92,95.3958003
243 | 46,52.05490703
244 | 54,59.70864577
245 | 39,36.79224762
246 | 44,37.08457698
247 | 31,24.18437976
248 | 68,67.28725332
249 | 86,82.870594
250 | 90,89.899991
251 | 38,36.94173178
252 | 21,19.87562242
253 | 95,90.71481654
254 | 56,61.09367762
255 | 60,60.11134958
256 | 65,64.83296316
257 | 78,81.40381769
258 | 89,92.40217686
259 | 6,2.576625376
260 | 67,63.80768172
261 | 36,38.67780759
262 | 16,16.82839701
263 | 100,99.78687252
264 | 45,44.68913433
265 | 73,71.00377824
266 | 57,51.57326718
267 | 20,19.87846479
268 | 76,79.50341495
269 | 34,34.58876491
270 | 55,55.7383467
271 | 72,68.19721905
272 | 55,55.81628509
273 | 8,9.391416798
274 | 56,56.01448111
275 | 72,77.9969477
276 | 58,55.37049953
277 | 6,11.89457829
278 | 96,94.79081712
279 | 23,25.69041546
280 | 58,53.52042319
281 | 23,18.31396758
282 | 19,21.42637785
283 | 25,30.41303282
284 | 64,67.68142149
285 | 21,17.0854783
286 | 59,60.91792707
287 | 19,14.99514319
288 | 16,16.74923937
289 | 42,41.46923883
290 | 43,42.84526108
291 | 61,59.12912974
292 | 92,91.30863673
293 | 11,8.673336357
294 | 41,39.31485292
295 | 1,5.313686205
296 | 8,5.405220518
297 | 71,68.5458879
298 | 46,47.33487629
299 | 55,54.09063686
300 | 62,63.29717058
301 | 47,52.45946688


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/windows/install.bat:
--------------------------------------------------------------------------------
1 | pip install -U backend.ai-client
2 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/windows/linear_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LinearRegressionGD(object):
 5 |     def __init__(self, fit_intercept=True, copy_X=True,
 6 |                  eta0=0.001, epochs=1000, weight_decay=0.9):
 7 |         self.fit_intercept = fit_intercept
 8 |         self.copy_X = copy_X
 9 |         self._eta0 = eta0
10 |         self._epochs = epochs
11 | 
12 |         self._cost_history = []
13 | 
14 |         self._coef = None
15 |         self._intercept = None
16 |         self._new_X = None
17 |         self._w_history = None
18 |         self._weight_decay = weight_decay
19 | 
20 |     def cost(self, h, y):
21 |         pass
22 | 
23 |     def hypothesis_function(self, X, theta):
24 |         pass
25 | 
26 |     def gradient(self, X, y, theta):
27 |         pass
28 | 
29 |     def fit(self, X, y):
30 |         # Write your code
31 | 
32 |         for epoch in range(self._epochs):
33 |             # 아래 코드를 반드시 활용할 것
34 |             gradient = self.gradient(self._new_X, y, theta).flatten()
35 | 
36 |             # Write your code
37 | 
38 |             if epoch % 100 == 0:
39 |                 self._w_history.append(theta)
40 |                 cost = self.cost(
41 |                     self.hypothesis_function(self._new_X, theta), y)
42 |                 self._cost_history.append(cost)
43 |             self._eta0 = self._eta0 * self._weight_decay
44 | 
45 |         # Write your code
46 | 
47 |     def predict(self, X):
48 |         pass
49 | 
50 |     @property
51 |     def coef(self):
52 |         return self._coef
53 | 
54 |     @property
55 |     def intercept(self):
56 |         return self._intercept
57 | 
58 |     @property
59 |     def weights_history(self):
60 |         return np.array(self._w_history)
61 | 
62 |     @property
63 |     def cost_history(self):
64 |         return self._cost_history
65 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/windows/mlr09.csv:
--------------------------------------------------------------------------------
1 | ﻿height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
2 | 6.8,225,0.442,0.672,9.26.3,180,0.435,0.797,11.76.4,190,0.456,0.761,15.86.2,180,0.416,0.651,8.66.9,205,0.449,0.9,23.26.4,225,0.431,0.78,27.46.3,185,0.487,0.771,9.36.8,235,0.469,0.75,166.9,235,0.435,0.818,4.76.7,210,0.48,0.825,12.56.9,245,0.516,0.632,20.16.9,245,0.493,0.757,9.16.3,185,0.374,0.709,8.16.1,185,0.424,0.782,8.66.2,180,0.441,0.775,20.36.8,220,0.503,0.88,256.5,194,0.503,0.833,19.27.6,225,0.425,0.571,3.36.3,210,0.371,0.816,11.27.1,240,0.504,0.714,10.56.8,225,0.4,0.765,10.17.3,263,0.482,0.655,7.26.4,210,0.475,0.244,13.66.8,235,0.428,0.728,97.2,230,0.559,0.721,24.66.4,190,0.441,0.757,12.66.6,220,0.492,0.747,5.66.8,210,0.402,0.739,8.76.1,180,0.415,0.713,7.76.5,235,0.492,0.742,24.16.4,185,0.484,0.861,11.76,175,0.387,0.721,7.76,192,0.436,0.785,9.67.3,263,0.482,0.655,7.26.1,180,0.34,0.821,12.36.7,240,0.516,0.728,8.96.4,210,0.475,0.846,13.65.8,160,0.412,0.813,11.26.9,230,0.411,0.595,2.87,245,0.407,0.573,3.27.3,228,0.445,0.726,9.45.9,155,0.291,0.707,11.96.2,200,0.449,0.804,15.46.8,235,0.546,0.784,7.47,235,0.48,0.744,18.95.9,105,0.359,0.839,7.96.1,180,0.528,0.79,12.25.7,185,0.352,0.701,117.1,245,0.414,0.778,2.85.8,180,0.425,0.872,11.87.4,240,0.599,0.713,17.16.8,225,0.482,0.701,11.66.8,215,0.457,0.734,5.87,230,0.435,0.764,8.3
3 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/windows/submit.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | set BACKEND_ACCESS_KEY=AKIAQWZD6A6Y5ZVOHSJR
 4 | set BACKEND_SECRET_KEY=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
 5 | 
 6 | set tmp="%1"
 7 | if "%tmp:"=.%"==".." (
 8 |     echo "Please give hash key as argument."
 9 | ) else (
10 |     backend.ai run --exec "python test.py linear_model.py %tmp%" python3 test.py linear_model.py test.csv train.csv mlr09.csv
11 | )
12 | 


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/windows/test.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 77,79.77515201
  3 | 21,23.17727887
  4 | 22,25.60926156
  5 | 20,17.85738813
  6 | 36,41.84986439
  7 | 15,9.805234876
  8 | 62,58.87465933
  9 | 95,97.61793701
 10 | 20,18.39512747
 11 | 5,8.746747654
 12 | 4,2.811415826
 13 | 19,17.09537241
 14 | 96,95.14907176
 15 | 62,61.38800663
 16 | 36,40.24701716
 17 | 15,14.82248589
 18 | 65,66.95806869
 19 | 14,16.63507984
 20 | 87,90.65513736
 21 | 69,77.22982636
 22 | 89,92.11906278
 23 | 51,46.91387709
 24 | 89,89.82634442
 25 | 27,21.71380347
 26 | 97,97.41206981
 27 | 58,57.01631363
 28 | 79,78.31056542
 29 | 21,19.1315097
 30 | 93,93.03483388
 31 | 27,26.59112396
 32 | 99,97.55155344
 33 | 31,31.43524822
 34 | 33,35.12724777
 35 | 80,78.61042432
 36 | 28,33.07112825
 37 | 47,51.69967172
 38 | 53,53.62235225
 39 | 69,69.46306072
 40 | 28,27.42497237
 41 | 33,36.34644189
 42 | 91,95.06140858
 43 | 71,68.16724757
 44 | 50,50.96155532
 45 | 76,78.04237454
 46 | 4,5.607664865
 47 | 37,36.11334779
 48 | 70,67.2352155
 49 | 68,65.01324035
 50 | 40,38.14753871
 51 | 35,34.31141446
 52 | 94,95.28503937
 53 | 88,87.84749912
 54 | 52,54.08170635
 55 | 31,31.93063515
 56 | 59,59.61247085
 57 | 0,-1.040114209
 58 | 39,47.49374765
 59 | 64,62.60089773
 60 | 69,70.9146434
 61 | 57,56.14834113
 62 | 13,14.05572877
 63 | 72,68.11367147
 64 | 76,75.59701346
 65 | 61,59.225745
 66 | 82,85.45504157
 67 | 18,17.76197116
 68 | 41,38.68888682
 69 | 50,50.96343637
 70 | 55,51.83503872
 71 | 13,17.0761107
 72 | 46,46.56141773
 73 | 13,10.34754461
 74 | 79,77.91032969
 75 | 53,50.17008622
 76 | 15,13.25690647
 77 | 28,31.32274932
 78 | 81,73.9308764
 79 | 69,74.45114379
 80 | 52,52.01932286
 81 | 84,83.68820499
 82 | 68,70.3698748
 83 | 27,23.44479161
 84 | 56,49.83051801
 85 | 48,49.88226593
 86 | 40,41.04525583
 87 | 39,33.37834391
 88 | 82,81.29750133
 89 | 100,105.5918375
 90 | 59,56.82457013
 91 | 43,48.67252645
 92 | 67,67.02150613
 93 | 38,38.43076389
 94 | 63,58.61466887
 95 | 91,89.12377509
 96 | 60,60.9105427
 97 | 14,13.83959878
 98 | 21,16.89085185
 99 | 87,84.06676818
100 | 73,70.34969772
101 | 32,33.38474138
102 | 2,-1.63296825
103 | 82,88.54475895
104 | 19,17.44047622
105 | 74,75.69298554
106 | 42,41.97607107
107 | 12,12.59244741
108 | 1,0.275307261
109 | 90,98.13258005
110 | 89,87.45721555
111 | 0,-2.344738542
112 | 41,39.3294153
113 | 16,16.68715211
114 | 94,96.58888601
115 | 97,97.70342201
116 | 66,67.01715955
117 | 24,25.63476257
118 | 17,13.41310757
119 | 90,95.15647284
120 | 13,9.744164258
121 | 0,-3.467883789
122 | 64,62.82816355
123 | 96,97.27405461
124 | 98,95.58017185
125 | 12,7.468501839
126 | 41,45.44599591
127 | 47,46.69013968
128 | 78,74.4993599
129 | 20,21.63500655
130 | 89,91.59548851
131 | 29,26.49487961
132 | 64,67.38654703
133 | 75,74.25362837
134 | 12,12.07991648
135 | 25,21.32273728
136 | 28,29.31770045
137 | 30,26.48713683
138 | 65,68.94699774
139 | 59,59.10598995
140 | 64,64.37521087
141 | 53,60.20758349
142 | 71,70.34329706
143 | 97,97.1082562
144 | 73,75.7584178
145 | 9,10.80462727
146 | 12,12.11219941
147 | 63,63.28312382
148 | 99,98.03017721
149 | 60,63.19354354
150 | 35,34.8534823
151 | 2,-2.819913974
152 | 60,59.8313966
153 | 32,29.38505024
154 | 94,97.00148372
155 | 84,85.18657275
156 | 63,61.74063192
157 | 22,18.84798163
158 | 81,78.79008525
159 | 93,95.12400481
160 | 33,30.48881287
161 | 7,10.41468095
162 | 42,38.98317436
163 | 46,46.11021062
164 | 54,52.45103628
165 | 16,21.16523945
166 | 49,52.28620611
167 | 43,44.18863945
168 | 95,97.13832018
169 | 66,67.22008001
170 | 21,18.98322306
171 | 35,24.3884599
172 | 80,79.44769523
173 | 37,40.03504862
174 | 54,53.32005764
175 | 56,54.55446979
176 | 1,-2.761182595
177 | 32,37.80182795
178 | 58,57.48741435
179 | 32,36.06292994
180 | 46,49.83538167
181 | 72,74.68953276
182 | 17,14.86159401
183 | 97,101.0697879
184 | 93,99.43577876
185 | 91,91.69240746
186 | 37,34.12473248
187 | 4,6.079390073
188 | 54,59.07247174
189 | 51,56.43046022
190 | 27,30.49412933
191 | 46,48.35172635
192 | 92,89.73153611
193 | 73,72.86282528
194 | 77,80.97144285
195 | 91,91.36566374
196 | 61,60.07137496
197 | 99,99.87382707
198 | 4,8.655714172
199 | 72,69.39858505
200 | 19,19.38780134
201 | 57,53.11628433
202 | 78,78.39683006
203 | 26,25.75612514
204 | 74,75.07484683
205 | 90,92.88772282
206 | 66,69.45498498
207 | 13,13.12109842
208 | 40,48.09843134
209 | 77,79.3142548
210 | 67,68.48820749
211 | 75,73.2300846
212 | 23,24.68362712
213 | 45,41.90368917
214 | 59,62.22635684
215 | 44,45.96396877
216 | 23,23.52647153
217 | 55,51.80035866
218 | 55,51.10774273
219 | 95,95.79747345
220 | 12,9.241138977
221 | 4,7.646529763
222 | 7,9.281699753
223 | 100,103.5266162
224 | 48,47.41006725
225 | 42,42.03835773
226 | 96,96.11982476
227 | 39,38.05766408
228 | 100,105.4503788
229 | 87,88.80306911
230 | 14,15.49301141
231 | 14,12.42624606
232 | 37,40.00709598
233 | 5,5.634030902
234 | 88,87.36938931
235 | 91,89.73951993
236 | 65,66.61499643
237 | 74,72.9138853
238 | 56,57.19103506
239 | 16,11.21710477
240 | 5,0.676076749
241 | 28,28.15668543
242 | 92,95.3958003
243 | 46,52.05490703
244 | 54,59.70864577
245 | 39,36.79224762
246 | 44,37.08457698
247 | 31,24.18437976
248 | 68,67.28725332
249 | 86,82.870594
250 | 90,89.899991
251 | 38,36.94173178
252 | 21,19.87562242
253 | 95,90.71481654
254 | 56,61.09367762
255 | 60,60.11134958
256 | 65,64.83296316
257 | 78,81.40381769
258 | 89,92.40217686
259 | 6,2.576625376
260 | 67,63.80768172
261 | 36,38.67780759
262 | 16,16.82839701
263 | 100,99.78687252
264 | 45,44.68913433
265 | 73,71.00377824
266 | 57,51.57326718
267 | 20,19.87846479
268 | 76,79.50341495
269 | 34,34.58876491
270 | 55,55.7383467
271 | 72,68.19721905
272 | 55,55.81628509
273 | 8,9.391416798
274 | 56,56.01448111
275 | 72,77.9969477
276 | 58,55.37049953
277 | 6,11.89457829
278 | 96,94.79081712
279 | 23,25.69041546
280 | 58,53.52042319
281 | 23,18.31396758
282 | 19,21.42637785
283 | 25,30.41303282
284 | 64,67.68142149
285 | 21,17.0854783
286 | 59,60.91792707
287 | 19,14.99514319
288 | 16,16.74923937
289 | 42,41.46923883
290 | 43,42.84526108
291 | 61,59.12912974
292 | 92,91.30863673
293 | 11,8.673336357
294 | 41,39.31485292
295 | 1,5.313686205
296 | 8,5.405220518
297 | 71,68.5458879
298 | 46,47.33487629
299 | 55,54.09063686
300 | 62,63.29717058
301 | 47,52.45946688


--------------------------------------------------------------------------------
/lab_asssigment/6_gradient_descent/windows/test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import urllib.parse
  3 | import urllib.request
  4 | import json
  5 | import argparse
  6 | import os
  7 | import types
  8 | import sys
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description="Autoscorer")
 12 | parser.add_argument("filename", help="File to submit")
 13 | parser.add_argument("hash", help="Hash key")
 14 | 
 15 | args = parser.parse_args()
 16 | if args.hash:
 17 |     hashkey = args.hash
 18 | if args.filename:
 19 |     filename = args.filename
 20 | 
 21 | 
 22 | class TestResult(unittest.TextTestResult):
 23 |     _previousTestClass = None
 24 |     _testRunEntered = False
 25 |     _moduleSetUpFailed = False
 26 | 
 27 |     def __init__(self, stream=None, descriptions=None, verbosity=1):
 28 |         super().__init__(
 29 |             stream=stream, descriptions=descriptions, verbosity=verbosity)
 30 |         self.tests_run = []
 31 | 
 32 |     def getTestsReport(self):
 33 |         """Returns the run tests as a list of the form [test_id, result]"""
 34 |         return self.tests_run
 35 | 
 36 |     def addError(self, test, err):
 37 |         """Called when an error has occurred. 'err' is a tuple of values as
 38 |         returned by sys.exc_info().
 39 |         """
 40 |         super().addError(test, err)
 41 |         self.errors.append((test, self._exc_info_to_string(err, test)))
 42 |         self._mirrorOutput = True
 43 |         self.tests_run.append([test.id(), 0])
 44 | 
 45 |     def addFailure(self, test, err):
 46 |         """Called when an error has occurred. 'err' is a tuple of values as
 47 |         returned by sys.exc_info()."""
 48 |         super().addFailure(test, err)
 49 |         self.failures.append((test, self._exc_info_to_string(err, test)))
 50 |         self._mirrorOutput = True
 51 |         self.tests_run.append([test.id(), 0])
 52 | 
 53 |     def addSuccess(self, test):
 54 |         "Called when a test has completed successfully"
 55 |         super().addSuccess(test)
 56 |         self.tests_run.append([test.id(), 1])
 57 | 
 58 | 
 59 | with urllib.request.urlopen('http://datasets.lablup.ai/private/python-tests/unit_test_linear_model_gd.py') as response:
 60 |     test_code = response.read()
 61 | 
 62 | 
 63 | test_module = types.ModuleType(
 64 |     'test_code',
 65 |     doc='Test case')
 66 | 
 67 | exec(test_code, test_module.__dict__)
 68 | sys.modules['test_code'] = test_module
 69 | 
 70 | import test_code as tc
 71 | loader = unittest.loader.defaultTestLoader
 72 | null_stream = open(os.devnull, "w")
 73 | test_suite = loader.loadTestsFromModule(tc)
 74 | result = unittest.TextTestRunner(
 75 |     stream=null_stream, verbosity=2, resultclass=TestResult).run(test_suite)
 76 | 
 77 | print("Generating result sheet...")
 78 | print("-------------------------------------------------------------------")
 79 | print("                 Test Case |  Passed? |   Feedback")
 80 | print("-------------------------------------------------------------------")
 81 | for c, r in result.tests_run:
 82 |     print("{0:s} |  {1:s}  | {2} ".format(
 83 |         c.rsplit('.', 1)[1].rjust(26),
 84 |         "PASSED" if r == 1 else "FAILED",
 85 |         "Good Job".rjust(10) if r == 1 else "Failed".rjust(10)))
 86 | 
 87 | # print(json.dumps(result.tests_run))
 88 | print("Reading source file...")
 89 | 
 90 | file = open(filename, "r")
 91 | print("Transferring results to server...")
 92 | payload = {
 93 |     'hashkey': hashkey,
 94 |     'result': result.tests_run,
 95 |     'code': file.read()
 96 | }
 97 | try:
 98 |     data = urllib.parse.urlencode(payload)
 99 |     data = data.encode('ascii')
100 |     req = urllib.request.Request('http://report.inflearn.com/submit', data)
101 |     with urllib.request.urlopen(req) as response:
102 |         resp = response.read()
103 | 
104 |     if json.loads(resp)['result'] == 0:
105 |         print("Transfer failed: hash key is already used.")
106 |     else:
107 |         print("Transfer completed.")
108 | 
109 | except Exception as e:
110 |     print("Error occurred on transferring.", e)
111 | 


--------------------------------------------------------------------------------