├── README.md
├── wk1-linear-regression
├── .ipynb_checkpoints
│ ├── Linear Regression 3D - CO2&Temp-checkpoint.ipynb
│ └── Linear Regression live - learn-m&b-checkpoint.ipynb
├── Linear Regression 3D - CO2&Temp.ipynb
├── Linear Regression live - learn-m&b.ipynb
├── annual_temp.csv
├── brain_boday.txt
├── challenge1.py
├── challenge_dataset.txt
├── data.csv
├── demo.py
└── global_co2.csv
├── wk2-neural-Networks
├── .ipynb_checkpoints
│ ├── NN-3layers-checkpoint.ipynb
│ └── feedForwardNN-checkpoint.ipynb
├── NN-3layers.ipynb
└── feedForwardNN.ipynb
├── wk4-earthquakes
├── .ipynb_checkpoints
│ └── earthquakes_NN-checkpoint.ipynb
├── database.csv
└── earthquakes_NN.ipynb
├── wk5-speed-dating
├── .ipynb_checkpoints
│ └── Speed dating prediction-checkpoint.ipynb
├── Speed Dating Data.csv
└── Speed dating prediction.ipynb
├── wk6-image-classifier
├── .ipynb_checkpoints
│ └── image-classifier-sheeps-goats-checkpoint.ipynb
└── image-classifier-sheeps-goats.ipynb
├── wk7-stock-price-prediction
├── .ipynb_checkpoints
│ └── Google-stock-prices-prediction-checkpoint.ipynb
└── Google-stock-prices-prediction.ipynb
└── wk8-generate-art
├── .ipynb_checkpoints
├── Generate-art-style-transform1-checkpoint.ipynb
└── Generate-art-style-transform2-checkpoint.ipynb
├── Generate-art-style-transform1.ipynb
├── Generate-art-style-transform2.ipynb
├── house.jpg
├── the_scream.jpg
└── wave.jpg
/README.md:
--------------------------------------------------------------------------------
1 | # Deep-Learning-projects
2 | Deep Learning projects
3 |
4 | In this repository, I keep all of my codes taking Siraj Raval's [Intro to Deep Learning](https://www.youtube.com/channel/UCWN3xxRkmTPmbKwht9FuE5A) courses. I am having a lot of fun learning from his videos.
5 |
--------------------------------------------------------------------------------
/wk1-linear-regression/.ipynb_checkpoints/Linear Regression live - learn-m&b-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Learn Linear Function m and b \n",
8 | " - Siraj's linear regression - live session\n",
9 | "\n",
10 | "for y = mx + b, (m is slope, b is y-intercept) --- \n",
11 | "Learn to find out m and b from data.csv"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "from numpy import *"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "### Compute Error\n",
30 | "To calculate our error use [Sum of squared distances formula](https://spin.atomicobject.com/wp-content/uploads/linear_regression_error1.png)"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 2,
36 | "metadata": {
37 | "collapsed": true
38 | },
39 | "outputs": [],
40 | "source": [
41 | "def compute_error_for_line_given_points(b, m, points):\n",
42 | " totalError = 0\n",
43 | " for i in range(0, len(points)):\n",
44 | " x = points[i, 0]\n",
45 | " y = points[i, 1]\n",
46 | " totalError += (y - (m * x + b)) ** 2\n",
47 | " return totalError / float(len(points))"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "### Compute Gradient\n",
55 | "To perform gradient descent use [Partial derivative with respect to b and m](https://spin.atomicobject.com/wp-content/uploads/linear_regression_gradient1.png)"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 3,
61 | "metadata": {
62 | "collapsed": true
63 | },
64 | "outputs": [],
65 | "source": [
66 | "def step_gradient(b_current, m_current, points, learningRate):\n",
67 | " b_gradient = 0\n",
68 | " m_gradient = 0\n",
69 | " N = float(len(points))\n",
70 | " for i in range(0, len(points)):\n",
71 | " x = points[i, 0]\n",
72 | " y = points[i, 1]\n",
73 | " b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
74 | " m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
75 | " new_b = b_current - (learningRate * b_gradient)\n",
76 | " new_m = m_current - (learningRate * m_gradient)\n",
77 | " return [new_b, new_m]"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "### Iterate Gradient steps\n",
85 | " [Gradient descent visualization](https://raw.githubusercontent.com/mattnedrich/GradientDescentExample/master/gradient_descent_example.gif)"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 4,
91 | "metadata": {
92 | "collapsed": true
93 | },
94 | "outputs": [],
95 | "source": [
96 | "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
97 | " b = starting_b\n",
98 | " m = starting_m\n",
99 | " for i in range(num_iterations):\n",
100 | " b, m = step_gradient(b, m, array(points), learning_rate)\n",
101 | " return [b, m]"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "### import data and learn m & b"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 7,
114 | "metadata": {
115 | "collapsed": false
116 | },
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n",
123 | "Running...\n",
124 | "After 1000 iterations b = 0.08893651993741346, m = 1.4777440851894448, error = 112.61481011613473\n"
125 | ]
126 | }
127 | ],
128 | "source": [
129 | "points = genfromtxt(\"data.csv\", delimiter=\",\")\n",
130 | "learning_rate = 0.0001\n",
131 | "initial_b = 0 # initial y-intercept guess\n",
132 | "initial_m = 0 # initial slope guess\n",
133 | "num_iterations = 1000\n",
134 | "print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\"\n",
135 | " .format(initial_b, initial_m,compute_error_for_line_given_points(initial_b, initial_m, points)))\n",
136 | "print(\"Running...\")\n",
137 | "[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
138 | "print(\"After {0} iterations b = {1}, m = {2}, error = {3}\"\n",
139 | " .format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))"
140 | ]
141 | }
142 | ],
143 | "metadata": {
144 | "kernelspec": {
145 | "display_name": "Python 3",
146 | "language": "python",
147 | "name": "python3"
148 | },
149 | "language_info": {
150 | "codemirror_mode": {
151 | "name": "ipython",
152 | "version": 3
153 | },
154 | "file_extension": ".py",
155 | "mimetype": "text/x-python",
156 | "name": "python",
157 | "nbconvert_exporter": "python",
158 | "pygments_lexer": "ipython3",
159 | "version": "3.5.2"
160 | }
161 | },
162 | "nbformat": 4,
163 | "nbformat_minor": 2
164 | }
165 |
--------------------------------------------------------------------------------
/wk1-linear-regression/Linear Regression live - learn-m&b.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Learn Linear Function m and b \n",
8 | " - Siraj's linear regression - live session\n",
9 | "\n",
10 | "for y = mx + b, (m is slope, b is y-intercept) --- \n",
11 | "Learn to find out m and b from data.csv"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "from numpy import *"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "### Compute Error\n",
30 | "To calculate our error use [Sum of squared distances formula](https://spin.atomicobject.com/wp-content/uploads/linear_regression_error1.png)"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 2,
36 | "metadata": {
37 | "collapsed": true
38 | },
39 | "outputs": [],
40 | "source": [
41 | "def compute_error_for_line_given_points(b, m, points):\n",
42 | " totalError = 0\n",
43 | " for i in range(0, len(points)):\n",
44 | " x = points[i, 0]\n",
45 | " y = points[i, 1]\n",
46 | " totalError += (y - (m * x + b)) ** 2\n",
47 | " return totalError / float(len(points))"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "### Compute Gradient\n",
55 | "To perform gradient descent use [Partial derivative with respect to b and m](https://spin.atomicobject.com/wp-content/uploads/linear_regression_gradient1.png)"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 3,
61 | "metadata": {
62 | "collapsed": true
63 | },
64 | "outputs": [],
65 | "source": [
66 | "def step_gradient(b_current, m_current, points, learningRate):\n",
67 | " b_gradient = 0\n",
68 | " m_gradient = 0\n",
69 | " N = float(len(points))\n",
70 | " for i in range(0, len(points)):\n",
71 | " x = points[i, 0]\n",
72 | " y = points[i, 1]\n",
73 | " b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
74 | " m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
75 | " new_b = b_current - (learningRate * b_gradient)\n",
76 | " new_m = m_current - (learningRate * m_gradient)\n",
77 | " return [new_b, new_m]"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "### Iterate Gradient steps\n",
85 | " [Gradient descent visualization](https://raw.githubusercontent.com/mattnedrich/GradientDescentExample/master/gradient_descent_example.gif)"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 4,
91 | "metadata": {
92 | "collapsed": true
93 | },
94 | "outputs": [],
95 | "source": [
96 | "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
97 | " b = starting_b\n",
98 | " m = starting_m\n",
99 | " for i in range(num_iterations):\n",
100 | " b, m = step_gradient(b, m, array(points), learning_rate)\n",
101 | " return [b, m]"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "### import data and learn m & b"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 7,
114 | "metadata": {
115 | "collapsed": false
116 | },
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n",
123 | "Running...\n",
124 | "After 1000 iterations b = 0.08893651993741346, m = 1.4777440851894448, error = 112.61481011613473\n"
125 | ]
126 | }
127 | ],
128 | "source": [
129 | "points = genfromtxt(\"data.csv\", delimiter=\",\")\n",
130 | "learning_rate = 0.0001\n",
131 | "initial_b = 0 # initial y-intercept guess\n",
132 | "initial_m = 0 # initial slope guess\n",
133 | "num_iterations = 1000\n",
134 | "print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\"\n",
135 | " .format(initial_b, initial_m,compute_error_for_line_given_points(initial_b, initial_m, points)))\n",
136 | "print(\"Running...\")\n",
137 | "[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
138 | "print(\"After {0} iterations b = {1}, m = {2}, error = {3}\"\n",
139 | " .format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))"
140 | ]
141 | }
142 | ],
143 | "metadata": {
144 | "kernelspec": {
145 | "display_name": "Python 3",
146 | "language": "python",
147 | "name": "python3"
148 | },
149 | "language_info": {
150 | "codemirror_mode": {
151 | "name": "ipython",
152 | "version": 3
153 | },
154 | "file_extension": ".py",
155 | "mimetype": "text/x-python",
156 | "name": "python",
157 | "nbconvert_exporter": "python",
158 | "pygments_lexer": "ipython3",
159 | "version": "3.5.2"
160 | }
161 | },
162 | "nbformat": 4,
163 | "nbformat_minor": 2
164 | }
165 |
--------------------------------------------------------------------------------
/wk1-linear-regression/annual_temp.csv:
--------------------------------------------------------------------------------
1 | Source,Year,Mean
2 | GCAG,2015,0.8990
3 | GISTEMP,2015,0.87
4 | GCAG,2014,0.7402
5 | GISTEMP,2014,0.75
6 | GCAG,2013,0.6687
7 | GISTEMP,2013,0.66
8 | GCAG,2012,0.6219
9 | GISTEMP,2012,0.64
10 | GCAG,2011,0.5759
11 | GISTEMP,2011,0.61
12 | GCAG,2010,0.7008
13 | GISTEMP,2010,0.72
14 | GCAG,2009,0.6354
15 | GISTEMP,2009,0.65
16 | GCAG,2008,0.5415
17 | GISTEMP,2008,0.54
18 | GCAG,2007,0.6113
19 | GISTEMP,2007,0.66
20 | GCAG,2006,0.6139
21 | GISTEMP,2006,0.63
22 | GCAG,2005,0.6583
23 | GISTEMP,2005,0.69
24 | GCAG,2004,0.5806
25 | GISTEMP,2004,0.55
26 | GCAG,2003,0.6145
27 | GISTEMP,2003,0.62
28 | GCAG,2002,0.6018
29 | GISTEMP,2002,0.63
30 | GCAG,2001,0.5455
31 | GISTEMP,2001,0.55
32 | GCAG,2000,0.4255
33 | GISTEMP,2000,0.42
34 | GCAG,1999,0.4427
35 | GISTEMP,1999,0.42
36 | GCAG,1998,0.6335
37 | GISTEMP,1998,0.64
38 | GCAG,1997,0.5185
39 | GISTEMP,1997,0.48
40 | GCAG,1996,0.3225
41 | GISTEMP,1996,0.35
42 | GCAG,1995,0.4593
43 | GISTEMP,1995,0.46
44 | GCAG,1994,0.3420
45 | GISTEMP,1994,0.32
46 | GCAG,1993,0.2857
47 | GISTEMP,1993,0.24
48 | GCAG,1992,0.2583
49 | GISTEMP,1992,0.23
50 | GCAG,1991,0.4079
51 | GISTEMP,1991,0.43
52 | GCAG,1990,0.4350
53 | GISTEMP,1990,0.44
54 | GCAG,1989,0.2982
55 | GISTEMP,1989,0.29
56 | GCAG,1988,0.3770
57 | GISTEMP,1988,0.41
58 | GCAG,1987,0.3710
59 | GISTEMP,1987,0.33
60 | GCAG,1986,0.2308
61 | GISTEMP,1986,0.19
62 | GCAG,1985,0.1357
63 | GISTEMP,1985,0.12
64 | GCAG,1984,0.1510
65 | GISTEMP,1984,0.16
66 | GCAG,1983,0.3429
67 | GISTEMP,1983,0.3
68 | GCAG,1982,0.1836
69 | GISTEMP,1982,0.13
70 | GCAG,1981,0.3024
71 | GISTEMP,1981,0.33
72 | GCAG,1980,0.2651
73 | GISTEMP,1980,0.27
74 | GCAG,1979,0.2288
75 | GISTEMP,1979,0.17
76 | GCAG,1978,0.1139
77 | GISTEMP,1978,0.07
78 | GCAG,1977,0.1996
79 | GISTEMP,1977,0.18
80 | GCAG,1976,-0.0769
81 | GISTEMP,1976,-0.12
82 | GCAG,1975,0.0060
83 | GISTEMP,1975,-0.02
84 | GCAG,1974,-0.0698
85 | GISTEMP,1974,-0.08
86 | GCAG,1973,0.1654
87 | GISTEMP,1973,0.15
88 | GCAG,1972,0.0280
89 | GISTEMP,1972,0.01
90 | GCAG,1971,-0.0775
91 | GISTEMP,1971,-0.09
92 | GCAG,1970,0.0383
93 | GISTEMP,1970,0.02
94 | GCAG,1969,0.0937
95 | GISTEMP,1969,0.06
96 | GCAG,1968,-0.0282
97 | GISTEMP,1968,-0.07
98 | GCAG,1967,-0.0112
99 | GISTEMP,1967,-0.02
100 | GCAG,1966,-0.0204
101 | GISTEMP,1966,-0.05
102 | GCAG,1965,-0.0752
103 | GISTEMP,1965,-0.1
104 | GCAG,1964,-0.1461
105 | GISTEMP,1964,-0.2
106 | GCAG,1963,0.1100
107 | GISTEMP,1963,0.06
108 | GCAG,1962,0.0924
109 | GISTEMP,1962,0.03
110 | GCAG,1961,0.0818
111 | GISTEMP,1961,0.05
112 | GCAG,1960,0.0252
113 | GISTEMP,1960,-0.03
114 | GCAG,1959,0.0640
115 | GISTEMP,1959,0.03
116 | GCAG,1958,0.1145
117 | GISTEMP,1958,0.07
118 | GCAG,1957,0.0538
119 | GISTEMP,1957,0.04
120 | GCAG,1956,-0.1945
121 | GISTEMP,1956,-0.2
122 | GCAG,1955,-0.1305
123 | GISTEMP,1955,-0.15
124 | GCAG,1954,-0.1118
125 | GISTEMP,1954,-0.12
126 | GCAG,1953,0.0997
127 | GISTEMP,1953,0.08
128 | GCAG,1952,0.0288
129 | GISTEMP,1952,0.01
130 | GCAG,1951,-0.0095
131 | GISTEMP,1951,-0.06
132 | GCAG,1950,-0.1579
133 | GISTEMP,1950,-0.18
134 | GCAG,1949,-0.0550
135 | GISTEMP,1949,-0.09
136 | GCAG,1948,-0.0471
137 | GISTEMP,1948,-0.09
138 | GCAG,1947,-0.0455
139 | GISTEMP,1947,-0.05
140 | GCAG,1946,-0.0013
141 | GISTEMP,1946,-0.04
142 | GCAG,1945,0.1754
143 | GISTEMP,1945,0.12
144 | GCAG,1944,0.2948
145 | GISTEMP,1944,0.25
146 | GCAG,1943,0.1598
147 | GISTEMP,1943,0.13
148 | GCAG,1942,0.1549
149 | GISTEMP,1942,0.09
150 | GCAG,1941,0.1974
151 | GISTEMP,1941,0.13
152 | GCAG,1940,0.0927
153 | GISTEMP,1940,0.08
154 | GCAG,1939,-0.0157
155 | GISTEMP,1939,-0.03
156 | GCAG,1938,-0.0318
157 | GISTEMP,1938,-0.03
158 | GCAG,1937,-0.0204
159 | GISTEMP,1937,-0.03
160 | GCAG,1936,-0.1173
161 | GISTEMP,1936,-0.15
162 | GCAG,1935,-0.1445
163 | GISTEMP,1935,-0.19
164 | GCAG,1934,-0.1075
165 | GISTEMP,1934,-0.14
166 | GCAG,1933,-0.2481
167 | GISTEMP,1933,-0.29
168 | GCAG,1932,-0.1214
169 | GISTEMP,1932,-0.16
170 | GCAG,1931,-0.0714
171 | GISTEMP,1931,-0.09
172 | GCAG,1930,-0.1016
173 | GISTEMP,1930,-0.14
174 | GCAG,1929,-0.2982
175 | GISTEMP,1929,-0.35
176 | GCAG,1928,-0.1749
177 | GISTEMP,1928,-0.21
178 | GCAG,1927,-0.1506
179 | GISTEMP,1927,-0.2
180 | GCAG,1926,-0.0618
181 | GISTEMP,1926,-0.09
182 | GCAG,1925,-0.1464
183 | GISTEMP,1925,-0.2
184 | GCAG,1924,-0.2510
185 | GISTEMP,1924,-0.28
186 | GCAG,1923,-0.2161
187 | GISTEMP,1923,-0.24
188 | GCAG,1922,-0.2318
189 | GISTEMP,1922,-0.27
190 | GCAG,1921,-0.1517
191 | GISTEMP,1921,-0.21
192 | GCAG,1920,-0.2152
193 | GISTEMP,1920,-0.27
194 | GCAG,1919,-0.2082
195 | GISTEMP,1919,-0.22
196 | GCAG,1918,-0.2118
197 | GISTEMP,1918,-0.26
198 | GCAG,1917,-0.3193
199 | GISTEMP,1917,-0.4
200 | GCAG,1916,-0.2979
201 | GISTEMP,1916,-0.34
202 | GCAG,1915,-0.0747
203 | GISTEMP,1915,-0.11
204 | GCAG,1914,-0.1444
205 | GISTEMP,1914,-0.16
206 | GCAG,1913,-0.3205
207 | GISTEMP,1913,-0.34
208 | GCAG,1912,-0.3318
209 | GISTEMP,1912,-0.35
210 | GCAG,1911,-0.4367
211 | GISTEMP,1911,-0.45
212 | GCAG,1910,-0.3862
213 | GISTEMP,1910,-0.43
214 | GCAG,1909,-0.4332
215 | GISTEMP,1909,-0.48
216 | GCAG,1908,-0.4441
217 | GISTEMP,1908,-0.43
218 | GCAG,1907,-0.3767
219 | GISTEMP,1907,-0.4
220 | GCAG,1906,-0.2208
221 | GISTEMP,1906,-0.23
222 | GCAG,1905,-0.2967
223 | GISTEMP,1905,-0.28
224 | GCAG,1904,-0.4240
225 | GISTEMP,1904,-0.45
226 | GCAG,1903,-0.3442
227 | GISTEMP,1903,-0.36
228 | GCAG,1902,-0.2535
229 | GISTEMP,1902,-0.28
230 | GCAG,1901,-0.1471
231 | GISTEMP,1901,-0.15
232 | GCAG,1900,-0.0704
233 | GISTEMP,1900,-0.09
234 | GCAG,1899,-0.1172
235 | GISTEMP,1899,-0.16
236 | GCAG,1898,-0.2578
237 | GISTEMP,1898,-0.28
238 | GCAG,1897,-0.1232
239 | GISTEMP,1897,-0.11
240 | GCAG,1896,-0.0971
241 | GISTEMP,1896,-0.15
242 | GCAG,1895,-0.2279
243 | GISTEMP,1895,-0.21
244 | GCAG,1894,-0.2828
245 | GISTEMP,1894,-0.3
246 | GCAG,1893,-0.3221
247 | GISTEMP,1893,-0.3
248 | GCAG,1892,-0.3079
249 | GISTEMP,1892,-0.26
250 | GCAG,1891,-0.2552
251 | GISTEMP,1891,-0.24
252 | GCAG,1890,-0.3233
253 | GISTEMP,1890,-0.36
254 | GCAG,1889,-0.1032
255 | GISTEMP,1889,-0.11
256 | GCAG,1888,-0.1541
257 | GISTEMP,1888,-0.2
258 | GCAG,1887,-0.2559
259 | GISTEMP,1887,-0.33
260 | GCAG,1886,-0.2101
261 | GISTEMP,1886,-0.3
262 | GCAG,1885,-0.2220
263 | GISTEMP,1885,-0.31
264 | GCAG,1884,-0.2099
265 | GISTEMP,1884,-0.28
266 | GCAG,1883,-0.1481
267 | GISTEMP,1883,-0.2
268 | GCAG,1882,-0.0710
269 | GISTEMP,1882,-0.1
270 | GCAG,1881,-0.0707
271 | GISTEMP,1881,-0.12
272 | GCAG,1880,-0.1247
273 | GISTEMP,1880,-0.2
--------------------------------------------------------------------------------
/wk1-linear-regression/brain_boday.txt:
--------------------------------------------------------------------------------
1 | Brain Body
2 | 3.385 44.500
3 | 0.480 15.500
4 | 1.350 8.100
5 | 465.000 423.000
6 | 36.330 119.500
7 | 27.660 115.000
8 | 14.830 98.200
9 | 1.040 5.500
10 | 4.190 58.000
11 | 0.425 6.400
12 | 0.101 4.000
13 | 0.920 5.700
14 | 1.000 6.600
15 | 0.005 0.140
16 | 0.060 1.000
17 | 3.500 10.800
18 | 2.000 12.300
19 | 1.700 6.300
20 | 2547.000 4603.000
21 | 0.023 0.300
22 | 187.100 419.000
23 | 521.000 655.000
24 | 0.785 3.500
25 | 10.000 115.000
26 | 3.300 25.600
27 | 0.200 5.000
28 | 1.410 17.500
29 | 529.000 680.000
30 | 207.000 406.000
31 | 85.000 325.000
32 | 0.750 12.300
33 | 62.000 1320.000
34 | 6654.000 5712.000
35 | 3.500 3.900
36 | 6.800 179.000
37 | 35.000 56.000
38 | 4.050 17.000
39 | 0.120 1.000
40 | 0.023 0.400
41 | 0.010 0.250
42 | 1.400 12.500
43 | 250.000 490.000
44 | 2.500 12.100
45 | 55.500 175.000
46 | 100.000 157.000
47 | 52.160 440.000
48 | 10.550 179.500
49 | 0.550 2.400
50 | 60.000 81.000
51 | 3.600 21.000
52 | 4.288 39.200
53 | 0.280 1.900
54 | 0.075 1.200
55 | 0.122 3.000
56 | 0.048 0.330
57 | 192.000 180.000
58 | 3.000 25.000
59 | 160.000 169.000
60 | 0.900 2.600
61 | 1.620 11.400
62 | 0.104 2.500
63 | 4.235 50.400
--------------------------------------------------------------------------------
/wk1-linear-regression/challenge1.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from sklearn import linear_model
4 | import matplotlib.pyplot as plt
5 |
6 | #read data
7 | df = pd.read_csv('challenge_dataset.txt',header=None, names=['x','y'])
8 |
9 | #print df.head()
10 | #print df.shape
11 | #print df.info()
12 | #print df.describe()
13 |
14 | x = df[['x']]
15 | y = df[['y']]
16 | #print x,y
17 |
18 | plt.scatter(x,y)
19 |
20 | # train model
21 | reg = linear_model.LinearRegression()
22 | reg.fit(x,y)
23 | pred = reg.predict(x)
24 |
25 | #Returns the coefficient of determination R^2 of the prediction
26 | r_square = reg.score(x,y)
27 | print "R^2 is ", r_square
28 |
29 | plt.plot(x, pred)
30 | plt.show()
31 |
32 | # Mean Squared Error
33 | from sklearn.metrics import mean_squared_error
34 | MSR = mean_squared_error(y,pred)
35 | print "Mean Square Error is ", MSR
--------------------------------------------------------------------------------
/wk1-linear-regression/challenge_dataset.txt:
--------------------------------------------------------------------------------
1 | 6.1101,17.592
2 | 5.5277,9.1302
3 | 8.5186,13.662
4 | 7.0032,11.854
5 | 5.8598,6.8233
6 | 8.3829,11.886
7 | 7.4764,4.3483
8 | 8.5781,12
9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 |
--------------------------------------------------------------------------------
/wk1-linear-regression/data.csv:
--------------------------------------------------------------------------------
1 | 32.502345269453031,31.70700584656992
2 | 53.426804033275019,68.77759598163891
3 | 61.530358025636438,62.562382297945803
4 | 47.475639634786098,71.546632233567777
5 | 59.813207869512318,87.230925133687393
6 | 55.142188413943821,78.211518270799232
7 | 52.211796692214001,79.64197304980874
8 | 39.299566694317065,59.171489321869508
9 | 48.10504169176825,75.331242297063056
10 | 52.550014442733818,71.300879886850353
11 | 45.419730144973755,55.165677145959123
12 | 54.351634881228918,82.478846757497919
13 | 44.164049496773352,62.008923245725825
14 | 58.16847071685779,75.392870425994957
15 | 56.727208057096611,81.43619215887864
16 | 48.955888566093719,60.723602440673965
17 | 44.687196231480904,82.892503731453715
18 | 60.297326851333466,97.379896862166078
19 | 45.618643772955828,48.847153317355072
20 | 38.816817537445637,56.877213186268506
21 | 66.189816606752601,83.878564664602763
22 | 65.41605174513407,118.59121730252249
23 | 47.48120860786787,57.251819462268969
24 | 41.57564261748702,51.391744079832307
25 | 51.84518690563943,75.380651665312357
26 | 59.370822011089523,74.765564032151374
27 | 57.31000343834809,95.455052922574737
28 | 63.615561251453308,95.229366017555307
29 | 46.737619407976972,79.052406169565586
30 | 50.556760148547767,83.432071421323712
31 | 52.223996085553047,63.358790317497878
32 | 35.567830047746632,41.412885303700563
33 | 42.436476944055642,76.617341280074044
34 | 58.16454011019286,96.769566426108199
35 | 57.504447615341789,74.084130116602523
36 | 45.440530725319981,66.588144414228594
37 | 61.89622268029126,77.768482417793024
38 | 33.093831736163963,50.719588912312084
39 | 36.436009511386871,62.124570818071781
40 | 37.675654860850742,60.810246649902211
41 | 44.555608383275356,52.682983366387781
42 | 43.318282631865721,58.569824717692867
43 | 50.073145632289034,82.905981485070512
44 | 43.870612645218372,61.424709804339123
45 | 62.997480747553091,115.24415280079529
46 | 32.669043763467187,45.570588823376085
47 | 40.166899008703702,54.084054796223612
48 | 53.575077531673656,87.994452758110413
49 | 33.864214971778239,52.725494375900425
50 | 64.707138666121296,93.576118692658241
51 | 38.119824026822805,80.166275447370964
52 | 44.502538064645101,65.101711570560326
53 | 40.599538384552318,65.562301260400375
54 | 41.720676356341293,65.280886920822823
55 | 51.088634678336796,73.434641546324301
56 | 55.078095904923202,71.13972785861894
57 | 41.377726534895203,79.102829683549857
58 | 62.494697427269791,86.520538440347153
59 | 49.203887540826003,84.742697807826218
60 | 41.102685187349664,59.358850248624933
61 | 41.182016105169822,61.684037524833627
62 | 50.186389494880601,69.847604158249183
63 | 52.378446219236217,86.098291205774103
64 | 50.135485486286122,59.108839267699643
65 | 33.644706006191782,69.89968164362763
66 | 39.557901222906828,44.862490711164398
67 | 56.130388816875467,85.498067778840223
68 | 57.362052133238237,95.536686846467219
69 | 60.269214393997906,70.251934419771587
70 | 35.678093889410732,52.721734964774988
71 | 31.588116998132829,50.392670135079896
72 | 53.66093226167304,63.642398775657753
73 | 46.682228649471917,72.247251068662365
74 | 43.107820219102464,57.812512976181402
75 | 70.34607561504933,104.25710158543822
76 | 44.492855880854073,86.642020318822006
77 | 57.50453330326841,91.486778000110135
78 | 36.930076609191808,55.231660886212836
79 | 55.805733357942742,79.550436678507609
80 | 38.954769073377065,44.847124242467601
81 | 56.901214702247074,80.207523139682763
82 | 56.868900661384046,83.14274979204346
83 | 34.33312470421609,55.723489260543914
84 | 59.04974121466681,77.634182511677864
85 | 57.788223993230673,99.051414841748269
86 | 54.282328705967409,79.120646274680027
87 | 51.088719898979143,69.588897851118475
88 | 50.282836348230731,69.510503311494389
89 | 44.211741752090113,73.687564318317285
90 | 38.005488008060688,61.366904537240131
91 | 32.940479942618296,67.170655768995118
92 | 53.691639571070056,85.668203145001542
93 | 68.76573426962166,114.85387123391394
94 | 46.230966498310252,90.123572069967423
95 | 68.319360818255362,97.919821035242848
96 | 50.030174340312143,81.536990783015028
97 | 49.239765342753763,72.111832469615663
98 | 50.039575939875988,85.232007342325673
99 | 48.149858891028863,66.224957888054632
100 | 25.128484647772304,53.454394214850524
--------------------------------------------------------------------------------
/wk1-linear-regression/demo.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from sklearn import linear_model
4 | import matplotlib.pyplot as plt
5 |
6 | #read data
7 | df = pd.read_fwf('brain_boday.txt')
8 | x = df[['Brain']]
9 | y = df[['Body']]
10 | #print x
11 |
12 | # train model
13 | body_reg = linear_model.LinearRegression()
14 | body_reg.fit(x,y)
15 | #a = np.array([[1],[2],[3],[4]]) # vector
16 | a = np.arange(10).reshape(10,1)
17 | #print a
18 | pred=body_reg.predict(a)
19 | #print pred
20 |
21 | plt.scatter(x,y) # independent, dependent
22 | plt.plot(x, body_reg.predict(x))
23 | plt.show()
24 |
25 | #plt.plot(a,pred)
26 | #plt.show()
27 |
--------------------------------------------------------------------------------
/wk1-linear-regression/global_co2.csv:
--------------------------------------------------------------------------------
1 | Year,Total,Gas Fuel,Liquid Fuel,Solid Fuel,Cement,Gas Flaring,Per Capita
2 | 1751,3,0,0,3,0,0,
3 | 1752,3,0,0,3,0,0,
4 | 1753,3,0,0,3,0,0,
5 | 1754,3,0,0,3,0,0,
6 | 1755,3,0,0,3,0,0,
7 | 1756,3,0,0,3,0,0,
8 | 1757,3,0,0,3,0,0,
9 | 1758,3,0,0,3,0,0,
10 | 1759,3,0,0,3,0,0,
11 | 1760,3,0,0,3,0,0,
12 | 1761,3,0,0,3,0,0,
13 | 1762,3,0,0,3,0,0,
14 | 1763,3,0,0,3,0,0,
15 | 1764,3,0,0,3,0,0,
16 | 1765,3,0,0,3,0,0,
17 | 1766,3,0,0,3,0,0,
18 | 1767,3,0,0,3,0,0,
19 | 1768,3,0,0,3,0,0,
20 | 1769,3,0,0,3,0,0,
21 | 1770,3,0,0,3,0,0,
22 | 1771,4,0,0,4,0,0,
23 | 1772,4,0,0,4,0,0,
24 | 1773,4,0,0,4,0,0,
25 | 1774,4,0,0,4,0,0,
26 | 1775,4,0,0,4,0,0,
27 | 1776,4,0,0,4,0,0,
28 | 1777,4,0,0,4,0,0,
29 | 1778,4,0,0,4,0,0,
30 | 1779,4,0,0,4,0,0,
31 | 1780,4,0,0,4,0,0,
32 | 1781,5,0,0,5,0,0,
33 | 1782,5,0,0,5,0,0,
34 | 1783,5,0,0,5,0,0,
35 | 1784,5,0,0,5,0,0,
36 | 1785,5,0,0,5,0,0,
37 | 1786,5,0,0,5,0,0,
38 | 1787,5,0,0,5,0,0,
39 | 1788,5,0,0,5,0,0,
40 | 1789,5,0,0,5,0,0,
41 | 1790,5,0,0,5,0,0,
42 | 1791,6,0,0,6,0,0,
43 | 1792,6,0,0,6,0,0,
44 | 1793,6,0,0,6,0,0,
45 | 1794,6,0,0,6,0,0,
46 | 1795,6,0,0,6,0,0,
47 | 1796,6,0,0,6,0,0,
48 | 1797,7,0,0,7,0,0,
49 | 1798,7,0,0,7,0,0,
50 | 1799,7,0,0,7,0,0,
51 | 1800,8,0,0,8,0,0,
52 | 1801,8,0,0,8,0,0,
53 | 1802,10,0,0,10,0,0,
54 | 1803,9,0,0,9,0,0,
55 | 1804,9,0,0,9,0,0,
56 | 1805,9,0,0,9,0,0,
57 | 1806,10,0,0,10,0,0,
58 | 1807,10,0,0,10,0,0,
59 | 1808,10,0,0,10,0,0,
60 | 1809,10,0,0,10,0,0,
61 | 1810,10,0,0,10,0,0,
62 | 1811,11,0,0,11,0,0,
63 | 1812,11,0,0,11,0,0,
64 | 1813,11,0,0,11,0,0,
65 | 1814,11,0,0,11,0,0,
66 | 1815,12,0,0,12,0,0,
67 | 1816,13,0,0,13,0,0,
68 | 1817,14,0,0,14,0,0,
69 | 1818,14,0,0,14,0,0,
70 | 1819,14,0,0,14,0,0,
71 | 1820,14,0,0,14,0,0,
72 | 1821,14,0,0,14,0,0,
73 | 1822,15,0,0,15,0,0,
74 | 1823,16,0,0,16,0,0,
75 | 1824,16,0,0,16,0,0,
76 | 1825,17,0,0,17,0,0,
77 | 1826,17,0,0,17,0,0,
78 | 1827,18,0,0,18,0,0,
79 | 1828,18,0,0,18,0,0,
80 | 1829,18,0,0,18,0,0,
81 | 1830,24,0,0,24,0,0,
82 | 1831,23,0,0,23,0,0,
83 | 1832,23,0,0,23,0,0,
84 | 1833,24,0,0,24,0,0,
85 | 1834,24,0,0,24,0,0,
86 | 1835,25,0,0,25,0,0,
87 | 1836,29,0,0,29,0,0,
88 | 1837,29,0,0,29,0,0,
89 | 1838,30,0,0,30,0,0,
90 | 1839,31,0,0,31,0,0,
91 | 1840,33,0,0,33,0,0,
92 | 1841,34,0,0,34,0,0,
93 | 1842,36,0,0,36,0,0,
94 | 1843,37,0,0,37,0,0,
95 | 1844,39,0,0,39,0,0,
96 | 1845,43,0,0,43,0,0,
97 | 1846,43,0,0,43,0,0,
98 | 1847,46,0,0,46,0,0,
99 | 1848,47,0,0,47,0,0,
100 | 1849,50,0,0,50,0,0,
101 | 1850,54,0,0,54,0,0,
102 | 1851,54,0,0,54,0,0,
103 | 1852,57,0,0,57,0,0,
104 | 1853,59,0,0,59,0,0,
105 | 1854,69,0,0,69,0,0,
106 | 1855,71,0,0,71,0,0,
107 | 1856,76,0,0,76,0,0,
108 | 1857,77,0,0,77,0,0,
109 | 1858,78,0,0,78,0,0,
110 | 1859,83,0,0,83,0,0,
111 | 1860,91,0,0,91,0,0,
112 | 1861,95,0,0,95,0,0,
113 | 1862,97,0,0,96,0,0,
114 | 1863,104,0,0,103,0,0,
115 | 1864,112,0,0,112,0,0,
116 | 1865,119,0,0,119,0,0,
117 | 1866,122,0,0,122,0,0,
118 | 1867,130,0,0,130,0,0,
119 | 1868,135,0,0,134,0,0,
120 | 1869,142,0,0,142,0,0,
121 | 1870,147,0,1,146,0,0,
122 | 1871,156,0,1,156,0,0,
123 | 1872,173,0,1,173,0,0,
124 | 1873,184,0,1,183,0,0,
125 | 1874,174,0,1,173,0,0,
126 | 1875,188,0,1,187,0,0,
127 | 1876,191,0,1,190,0,0,
128 | 1877,194,0,2,192,0,0,
129 | 1878,196,0,2,194,0,0,
130 | 1879,210,0,3,207,0,0,
131 | 1880,236,0,3,233,0,0,
132 | 1881,243,0,4,239,0,0,
133 | 1882,256,0,4,252,0,0,
134 | 1883,272,0,3,269,0,0,
135 | 1884,275,0,4,271,0,0,
136 | 1885,277,1,4,273,0,0,
137 | 1886,281,2,5,275,0,0,
138 | 1887,295,3,5,287,0,0,
139 | 1888,327,5,5,317,0,0,
140 | 1889,327,3,6,318,0,0,
141 | 1890,356,3,8,345,0,0,
142 | 1891,372,2,9,360,0,0,
143 | 1892,374,2,9,363,0,0,
144 | 1893,370,2,10,358,0,0,
145 | 1894,383,2,9,372,0,0,
146 | 1895,406,2,11,393,0,0,
147 | 1896,419,2,12,405,0,0,
148 | 1897,440,2,13,425,0,0,
149 | 1898,465,2,13,449,0,0,
150 | 1899,507,3,14,491,0,0,
151 | 1900,534,3,16,515,0,0,
152 | 1901,552,4,18,531,0,0,
153 | 1902,566,4,19,543,0,0,
154 | 1903,617,4,20,593,0,0,
155 | 1904,624,4,23,597,0,0,
156 | 1905,663,5,23,636,0,0,
157 | 1906,707,5,23,680,0,0,
158 | 1907,784,5,28,750,0,0,
159 | 1908,750,5,30,714,0,0,
160 | 1909,785,6,32,747,0,0,
161 | 1910,819,7,34,778,0,0,
162 | 1911,836,7,36,792,0,0,
163 | 1912,879,8,37,834,0,0,
164 | 1913,943,8,41,895,0,0,
165 | 1914,850,8,42,800,0,0,
166 | 1915,838,9,45,784,0,0,
167 | 1916,901,10,48,842,0,0,
168 | 1917,955,11,54,891,0,0,
169 | 1918,936,10,53,873,0,0,
170 | 1919,806,10,61,735,0,0,
171 | 1920,932,11,78,843,0,0,
172 | 1921,803,10,84,709,0,0,
173 | 1922,845,11,94,740,0,0,
174 | 1923,970,14,111,845,0,0,
175 | 1924,963,16,110,836,0,0,
176 | 1925,975,17,116,842,0,0,
177 | 1926,983,19,119,846,0,0,
178 | 1927,1062,21,136,905,0,0,
179 | 1928,1065,23,143,890,10,0,
180 | 1929,1145,28,160,947,10,0,
181 | 1930,1053,28,152,862,10,0,
182 | 1931,940,25,147,759,8,0,
183 | 1932,847,24,141,675,7,0,
184 | 1933,893,25,154,708,7,0,
185 | 1934,973,28,162,775,8,0,
186 | 1935,1027,30,176,811,9,0,
187 | 1936,1130,34,192,893,11,0,
188 | 1937,1209,38,219,941,11,0,
189 | 1938,1142,37,214,880,12,0,
190 | 1939,1192,38,222,918,13,0,
191 | 1940,1299,42,229,1017,11,0,
192 | 1941,1334,42,236,1043,12,0,
193 | 1942,1342,45,222,1063,11,0,
194 | 1943,1391,50,239,1092,10,0,
195 | 1944,1383,54,275,1047,7,0,
196 | 1945,1160,59,275,820,7,0,
197 | 1946,1238,61,292,875,10,0,
198 | 1947,1392,67,322,992,12,0,
199 | 1948,1469,76,364,1015,14,0,
200 | 1949,1419,81,362,960,16,0,
201 | 1950,1630,97,423,1070,18,23,0.64
202 | 1951,1767,115,479,1129,20,24,0.69
203 | 1952,1795,124,504,1119,22,26,0.68
204 | 1953,1841,131,533,1125,24,27,0.69
205 | 1954,1865,138,557,1116,27,27,0.69
206 | 1955,2042,150,625,1208,30,31,0.74
207 | 1956,2177,161,679,1273,32,32,0.77
208 | 1957,2270,178,714,1309,34,35,0.79
209 | 1958,2330,192,731,1336,36,35,0.8
210 | 1959,2454,206,789,1382,40,36,0.83
211 | 1960,2569,227,849,1410,43,39,0.85
212 | 1961,2580,240,904,1349,45,42,0.84
213 | 1962,2686,263,980,1351,49,44,0.86
214 | 1963,2833,286,1052,1396,51,47,0.88
215 | 1964,2995,316,1137,1435,57,51,0.92
216 | 1965,3130,337,1219,1460,59,55,0.94
217 | 1966,3288,364,1323,1478,63,60,0.97
218 | 1967,3393,392,1423,1448,65,66,0.98
219 | 1968,3566,424,1551,1448,70,73,1.01
220 | 1969,3780,467,1673,1486,74,80,1.05
221 | 1970,4053,493,1839,1556,78,87,1.1
222 | 1971,4208,530,1947,1559,84,88,1.12
223 | 1972,4376,560,2057,1576,89,94,1.14
224 | 1973,4614,588,2241,1581,95,110,1.18
225 | 1974,4623,597,2245,1579,96,107,1.16
226 | 1975,4596,604,2132,1673,95,92,1.13
227 | 1976,4864,630,2314,1710,103,108,1.18
228 | 1977,5026,650,2398,1765,108,104,1.19
229 | 1978,5087,680,2392,1793,116,106,1.19
230 | 1979,5369,721,2544,1887,119,98,1.23
231 | 1980,5315,740,2422,1947,120,86,1.2
232 | 1981,5152,756,2289,1921,121,64,1.14
233 | 1982,5113,740,2196,1992,121,64,1.11
234 | 1983,5094,741,2176,1995,125,58,1.09
235 | 1984,5280,808,2199,2094,128,51,1.11
236 | 1985,5439,837,2186,2237,131,49,1.12
237 | 1986,5607,831,2293,2300,137,46,1.14
238 | 1987,5752,894,2306,2364,143,44,1.15
239 | 1988,5965,937,2412,2414,152,50,1.17
240 | 1989,6097,985,2459,2457,156,41,1.17
241 | 1990,6127,1019,2492,2419,157,40,1.16
242 | 1991,6217,1063,2605,2345,161,44,1.16
243 | 1992,6164,1095,2510,2357,167,35,1.13
244 | 1993,6162,1129,2523,2298,176,36,1.11
245 | 1994,6266,1139,2546,2358,186,38,1.11
246 | 1995,6398,1157,2565,2442,197,36,1.12
247 | 1996,6542,1209,2624,2469,203,37,1.13
248 | 1997,6651,1208,2700,2495,209,38,1.13
249 | 1998,6643,1243,2766,2391,209,35,1.12
250 | 1999,6610,1270,2737,2352,217,33,1.1
251 | 2000,6765,1288,2838,2367,226,45,1.11
252 | 2001,6927,1312,2840,2492,237,46,1.12
253 | 2002,6996,1344,2831,2521,252,48,1.12
254 | 2003,7416,1391,2959,2743,276,48,1.17
255 | 2004,7807,1437,3053,2967,298,53,1.21
256 | 2005,8093,1480,3076,3157,320,60,1.24
257 | 2006,8370,1525,3089,3339,356,61,1.27
258 | 2007,8566,1572,3081,3464,382,68,1.28
259 | 2008,8783,1631,3122,3571,388,71,1.3
260 | 2009,8740,1585,3056,3620,413,66,1.28
261 | 2010,9167,1702,3114,3842,450,59,1.33
--------------------------------------------------------------------------------
/wk2-neural-Networks/.ipynb_checkpoints/NN-3layers-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 7,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 27,
17 | "metadata": {
18 | "collapsed": false
19 | },
20 | "outputs": [],
21 | "source": [
22 | "class NeuralNetwork():\n",
23 | " def __init__(self):\n",
24 | " np.random.seed(1)\n",
25 | " \n",
26 | " # setting the number of nodes \n",
27 | " l2 = 5\n",
28 | " l3 = 4\n",
29 | " \n",
30 | " # initialize 3 weights\n",
31 | " self.synaptic_weights1 = 2 * np.random.random((3,l2)) - 1\n",
32 | " self.synaptic_weights2 = 2 * np.random.random((l2,l3)) - 1\n",
33 | " self.synaptic_weights3 = 2 * np.random.random((l3,1)) - 1\n",
34 | " \n",
35 | " self.activation_function = lambda x: 1 / (1 + np.exp(-x))\n",
36 | " self.derivative = lambda x: x * (1-x)\n",
37 | " \n",
38 | " def train(self, X, y, iterations):\n",
39 | " # Convert inputs list to 2d array\n",
40 | " #X = np.array(X, ndmin=2)\n",
41 | " #y = np.array(y, ndmin=2)\n",
42 | " \n",
43 | " for iter in range(iterations):\n",
44 | " # feed forward\n",
45 | " a2 = self.activation_function(np.dot(X, self.synaptic_weights1))\n",
46 | " a3 = self.activation_function(np.dot(a2, self.synaptic_weights2))\n",
47 | " output = self.activation_function(np.dot(a3, self.synaptic_weights3))\n",
48 | " \n",
49 | " # error\n",
50 | " delta4 = (y - output)*self.derivative(output)\n",
51 | " delta3 = np.dot(self.synaptic_weights3,delta4.T)*self.derivative(a3).T\n",
52 | " delta2 = np.dot(self.synaptic_weights2,delta3)*self.derivative(a2).T\n",
53 | " \n",
54 | " # adjustments\n",
55 | " adjustment3 = np.dot(a3.T, delta4)\n",
56 | " adjustment2 = np.dot(a2.T, delta3.T)\n",
57 | " adjustment1 = np.dot(X.T, delta2.T)\n",
58 | " \n",
59 | " # update weights\n",
60 | " self.synaptic_weights1 += adjustment1\n",
61 | " self.synaptic_weights2 += adjustment2\n",
62 | " self.synaptic_weights3 += adjustment3\n",
63 | " \n",
64 | " def run(self, X):\n",
65 | " # forward pass\n",
66 | " a2 = self.activation_function(np.dot(X, self.synaptic_weights1))\n",
67 | " a3 = self.activation_function(np.dot(a2, self.synaptic_weights2))\n",
68 | " output = self.activation_function(np.dot(a3, self.synaptic_weights3))\n",
69 | " \n",
70 | " return output\n",
71 | " "
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 28,
77 | "metadata": {
78 | "collapsed": false
79 | },
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "Random starting synaptic weights (layer 1): \n",
86 | "[[-0.16595599 0.44064899 -0.99977125 -0.39533485 -0.70648822]\n",
87 | " [-0.81532281 -0.62747958 -0.30887855 -0.20646505 0.07763347]\n",
88 | " [-0.16161097 0.370439 -0.5910955 0.75623487 -0.94522481]]\n",
89 | "Random starting synaptic weights (layer 2): \n",
90 | "[[ 0.34093502 -0.1653904 0.11737966 -0.71922612]\n",
91 | " [-0.60379702 0.60148914 0.93652315 -0.37315164]\n",
92 | " [ 0.38464523 0.7527783 0.78921333 -0.82991158]\n",
93 | " [-0.92189043 -0.66033916 0.75628501 -0.80330633]\n",
94 | " [-0.15778475 0.91577906 0.06633057 0.38375423]]\n",
95 | "Random starting synaptic weights (layer 3): \n",
96 | "[[-0.36896874]\n",
97 | " [ 0.37300186]\n",
98 | " [ 0.66925134]\n",
99 | " [-0.96342345]]\n",
100 | "\n",
101 | "New synaptic weights (layer 1) after training: \n",
102 | "[[-0.39042717 4.02220543 -1.52322523 2.40451717 -2.77177632]\n",
103 | " [-0.86817904 -0.33659723 -0.245578 -0.31292608 0.26079733]\n",
104 | " [-0.00600591 -1.69046817 0.12647375 -0.79367455 1.04614 ]]\n",
105 | "\n",
106 | "New synaptic weights (layer 2) after training: \n",
107 | "[[ 0.9614375 -0.15372521 -0.67703076 -0.00498486]\n",
108 | " [-2.7714058 0.77362787 2.71638353 -2.4249225 ]\n",
109 | " [ 1.88550044 0.70717346 -0.71729366 0.7730995 ]\n",
110 | " [-1.59473372 -0.55756571 1.23221965 -1.28695185]\n",
111 | " [ 1.92232578 0.86077523 -2.13676866 2.54238247]]\n",
112 | "\n",
113 | "New synaptic weights (layer 3) after training: \n",
114 | "[[-4.392069 ]\n",
115 | " [ 0.66563256]\n",
116 | " [ 5.76280212]\n",
117 | " [-3.88936424]]\n"
118 | ]
119 | }
120 | ],
121 | "source": [
122 | "NN = NeuralNetwork()\n",
123 | "\n",
124 | "print(\"Random starting synaptic weights (layer 1): \")\n",
125 | "print(NN.synaptic_weights1)\n",
126 | "print(\"Random starting synaptic weights (layer 2): \")\n",
127 | "print(NN.synaptic_weights2)\n",
128 | "print(\"Random starting synaptic weights (layer 3): \")\n",
129 | "print(NN.synaptic_weights3)\n",
130 | "\n",
131 | "inputs = np.array([[0,0,1],[1,1,1],[1,0,1],[0,1,1]])\n",
132 | "targets = np.array([[0,1,1,0]]).T\n",
133 | "\n",
134 | "NN.train(inputs,targets, 10000)\n",
135 | "\n",
136 | "print (\"\\nNew synaptic weights (layer 1) after training: \")\n",
137 | "print (NN.synaptic_weights1)\n",
138 | "print (\"\\nNew synaptic weights (layer 2) after training: \")\n",
139 | "print (NN.synaptic_weights2)\n",
140 | "print (\"\\nNew synaptic weights (layer 3) after training: \")\n",
141 | "print (NN.synaptic_weights3)"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 29,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [
151 | {
152 | "name": "stdout",
153 | "output_type": "stream",
154 | "text": [
155 | "\n",
156 | "Predict new value [1,0,0]: \n",
157 | "[ 0.99650838]\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "# test with new input\n",
163 | "print(\"\\nPredict new value [1,0,0]: \")\n",
164 | "print(NN.run(np.array([1,0,0])))"
165 | ]
166 | }
167 | ],
168 | "metadata": {
169 | "kernelspec": {
170 | "display_name": "Python 3",
171 | "language": "python",
172 | "name": "python3"
173 | },
174 | "language_info": {
175 | "codemirror_mode": {
176 | "name": "ipython",
177 | "version": 3
178 | },
179 | "file_extension": ".py",
180 | "mimetype": "text/x-python",
181 | "name": "python",
182 | "nbconvert_exporter": "python",
183 | "pygments_lexer": "ipython3",
184 | "version": "3.5.2"
185 | }
186 | },
187 | "nbformat": 4,
188 | "nbformat_minor": 2
189 | }
190 |
--------------------------------------------------------------------------------
/wk2-neural-Networks/.ipynb_checkpoints/feedForwardNN-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "class NeuralNetwork():\n",
23 | " def __init__(self):\n",
24 | " np.random.seed(1) # Seed the random number generator\n",
25 | " self.weights = {} # Create dict to hold weights\n",
26 | " self.num_layers = 1 # Set initial number of layer to one (input layer)\n",
27 | " self.adjustments = {} # Create dict to hold adjustements\n",
28 | "\n",
29 | " def add_layer(self, shape):\n",
30 | " # Create weights with shape specified + biases\n",
31 | " self.weights[self.num_layers] = np.vstack((2 * np.random.random(shape) - 1, 2 * np.random.random((1, shape[1])) - 1))\n",
32 | " # Initialize the adjustements for these weights to zero\n",
33 | " self.adjustments[self.num_layers] = np.zeros(shape)\n",
34 | " self.num_layers += 1\n",
35 | "\n",
36 | " def __sigmoid(self, x):\n",
37 | " return 1 / (1 + np.exp(-x))\n",
38 | "\n",
39 | " def __sigmoid_derivative(self, x):\n",
40 | " return x * (1 - x)\n",
41 | "\n",
42 | " def predict(self, data):\n",
43 | " # Pass data through pretrained network\n",
44 | " for layer in range(1, self.num_layers+1):\n",
45 | " data = np.dot(data, self.weights[layer-1][:, :-1]) + self.weights[layer-1][:, -1] # + self.biases[layer]\n",
46 | " data = self.__sigmoid(data)\n",
47 | " return data\n",
48 | "\n",
49 | " def __forward_propagate(self, data):\n",
50 | " # Progapagate through network and hold values for use in back-propagation\n",
51 | " activation_values = {}\n",
52 | " activation_values[1] = data\n",
53 | " for layer in range(2, self.num_layers+1):\n",
54 | " data = np.dot(data.T, self.weights[layer-1][:-1, :]) + self.weights[layer-1][-1, :].T # + self.biases[layer]\n",
55 | " data = self.__sigmoid(data).T\n",
56 | " activation_values[layer] = data\n",
57 | " return activation_values\n",
58 | "\n",
59 | " def simple_error(self, outputs, targets):\n",
60 | " return targets - outputs\n",
61 | "\n",
62 | " def sum_squared_error(self, outputs, targets):\n",
63 | " return 0.5 * np.mean(np.sum(np.power(outputs - targets, 2), axis=1))\n",
64 | "\n",
65 | " def __back_propagate(self, output, target):\n",
66 | " deltas = {}\n",
67 | " # Delta of output Layer\n",
68 | " deltas[self.num_layers] = output[self.num_layers] - target\n",
69 | "\n",
70 | " # Delta of hidden Layers\n",
71 | " for layer in reversed(range(2, self.num_layers)): # All layers except input/output\n",
72 | " a_val = output[layer]\n",
73 | " weights = self.weights[layer][:-1, :]\n",
74 | " prev_deltas = deltas[layer+1]\n",
75 | " deltas[layer] = np.multiply(np.dot(weights, prev_deltas), self.__sigmoid_derivative(a_val))\n",
76 | "\n",
77 | " # Caclculate total adjustements based on deltas\n",
78 | " for layer in range(1, self.num_layers):\n",
79 | " self.adjustments[layer] += np.dot(deltas[layer+1], output[layer].T).T\n",
80 | "\n",
81 | " def __gradient_descente(self, batch_size, learning_rate):\n",
82 | " # Calculate partial derivative and take a step in that direction\n",
83 | " for layer in range(1, self.num_layers):\n",
84 | " partial_d = (1/batch_size) * self.adjustments[layer]\n",
85 | " self.weights[layer][:-1, :] += learning_rate * -partial_d\n",
86 | " self.weights[layer][-1, :] += learning_rate*1e-3 * -partial_d[-1, :]\n",
87 | "\n",
88 | "\n",
89 | " def train(self, inputs, targets, num_epochs, learning_rate=1, stop_accuracy=1e-5):\n",
90 | " error = []\n",
91 | " for iteration in range(num_epochs):\n",
92 | " for i in range(len(inputs)):\n",
93 | " x = inputs[i]\n",
94 | " y = targets[i]\n",
95 | " # Pass the training set through our neural network\n",
96 | " output = self.__forward_propagate(x)\n",
97 | "\n",
98 | " # Calculate the error\n",
99 | " loss = self.sum_squared_error(output[self.num_layers], y)\n",
100 | " error.append(loss)\n",
101 | "\n",
102 | " # Calculate Adjustements\n",
103 | " self.__back_propagate(output, y)\n",
104 | "\n",
105 | " self.__gradient_descente(i, learning_rate)\n",
106 | "\n",
107 | " # Check if accuarcy criterion is satisfied\n",
108 | " if np.mean(error[-(i+1):]) < stop_accuracy and iteration > 0:\n",
109 | " break\n",
110 | "\n",
111 | " return(np.asarray(error), iteration+1)\n"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 3,
117 | "metadata": {
118 | "collapsed": false
119 | },
120 | "outputs": [
121 | {
122 | "name": "stdout",
123 | "output_type": "stream",
124 | "text": [
125 | "Error = 7.29967091123e-06\n",
126 | "Epoches needed to train = 62\n"
127 | ]
128 | }
129 | ],
130 | "source": [
131 | "# Create instance of a neural network\n",
132 | "nn = NeuralNetwork()\n",
133 | "\n",
134 | "# Add Layers (Input layer is created by default)\n",
135 | "nn.add_layer((2, 9))\n",
136 | "nn.add_layer((9, 1))\n",
137 | "\n",
138 | "# XOR function\n",
139 | "training_data = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]).reshape(4, 2, 1)\n",
140 | "training_labels = np.asarray([[0], [1], [1], [0]])\n",
141 | "\n",
142 | "error, iteration = nn.train(training_data, training_labels, 5000)\n",
143 | "print('Error = ', np.mean(error[-4:]))\n",
144 | "print('Epoches needed to train = ', iteration)\n"
145 | ]
146 | }
147 | ],
148 | "metadata": {
149 | "kernelspec": {
150 | "display_name": "Python 3",
151 | "language": "python",
152 | "name": "python3"
153 | },
154 | "language_info": {
155 | "codemirror_mode": {
156 | "name": "ipython",
157 | "version": 3
158 | },
159 | "file_extension": ".py",
160 | "mimetype": "text/x-python",
161 | "name": "python",
162 | "nbconvert_exporter": "python",
163 | "pygments_lexer": "ipython3",
164 | "version": "3.5.2"
165 | }
166 | },
167 | "nbformat": 4,
168 | "nbformat_minor": 2
169 | }
170 |
--------------------------------------------------------------------------------
/wk2-neural-Networks/NN-3layers.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 7,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 27,
17 | "metadata": {
18 | "collapsed": false
19 | },
20 | "outputs": [],
21 | "source": [
22 | "class NeuralNetwork():\n",
23 | " def __init__(self):\n",
24 | " np.random.seed(1)\n",
25 | " \n",
26 | " # setting the number of nodes \n",
27 | " l2 = 5\n",
28 | " l3 = 4\n",
29 | " \n",
30 | " # initialize 3 weights\n",
31 | " self.synaptic_weights1 = 2 * np.random.random((3,l2)) - 1\n",
32 | " self.synaptic_weights2 = 2 * np.random.random((l2,l3)) - 1\n",
33 | " self.synaptic_weights3 = 2 * np.random.random((l3,1)) - 1\n",
34 | " \n",
35 | " self.activation_function = lambda x: 1 / (1 + np.exp(-x))\n",
36 | " self.derivative = lambda x: x * (1-x)\n",
37 | " \n",
38 | " def train(self, X, y, iterations):\n",
39 | " # Convert inputs list to 2d array\n",
40 | " #X = np.array(X, ndmin=2)\n",
41 | " #y = np.array(y, ndmin=2)\n",
42 | " \n",
43 | " for iter in range(iterations):\n",
44 | " # feed forward\n",
45 | " a2 = self.activation_function(np.dot(X, self.synaptic_weights1))\n",
46 | " a3 = self.activation_function(np.dot(a2, self.synaptic_weights2))\n",
47 | " output = self.activation_function(np.dot(a3, self.synaptic_weights3))\n",
48 | " \n",
49 | " # error\n",
50 | " delta4 = (y - output)*self.derivative(output)\n",
51 | " delta3 = np.dot(self.synaptic_weights3,delta4.T)*self.derivative(a3).T\n",
52 | " delta2 = np.dot(self.synaptic_weights2,delta3)*self.derivative(a2).T\n",
53 | " \n",
54 | " # adjustments\n",
55 | " adjustment3 = np.dot(a3.T, delta4)\n",
56 | " adjustment2 = np.dot(a2.T, delta3.T)\n",
57 | " adjustment1 = np.dot(X.T, delta2.T)\n",
58 | " \n",
59 | " # update weights\n",
60 | " self.synaptic_weights1 += adjustment1\n",
61 | " self.synaptic_weights2 += adjustment2\n",
62 | " self.synaptic_weights3 += adjustment3\n",
63 | " \n",
64 | " def run(self, X):\n",
65 | " # forward pass\n",
66 | " a2 = self.activation_function(np.dot(X, self.synaptic_weights1))\n",
67 | " a3 = self.activation_function(np.dot(a2, self.synaptic_weights2))\n",
68 | " output = self.activation_function(np.dot(a3, self.synaptic_weights3))\n",
69 | " \n",
70 | " return output\n",
71 | " "
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 28,
77 | "metadata": {
78 | "collapsed": false
79 | },
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "Random starting synaptic weights (layer 1): \n",
86 | "[[-0.16595599 0.44064899 -0.99977125 -0.39533485 -0.70648822]\n",
87 | " [-0.81532281 -0.62747958 -0.30887855 -0.20646505 0.07763347]\n",
88 | " [-0.16161097 0.370439 -0.5910955 0.75623487 -0.94522481]]\n",
89 | "Random starting synaptic weights (layer 2): \n",
90 | "[[ 0.34093502 -0.1653904 0.11737966 -0.71922612]\n",
91 | " [-0.60379702 0.60148914 0.93652315 -0.37315164]\n",
92 | " [ 0.38464523 0.7527783 0.78921333 -0.82991158]\n",
93 | " [-0.92189043 -0.66033916 0.75628501 -0.80330633]\n",
94 | " [-0.15778475 0.91577906 0.06633057 0.38375423]]\n",
95 | "Random starting synaptic weights (layer 3): \n",
96 | "[[-0.36896874]\n",
97 | " [ 0.37300186]\n",
98 | " [ 0.66925134]\n",
99 | " [-0.96342345]]\n",
100 | "\n",
101 | "New synaptic weights (layer 1) after training: \n",
102 | "[[-0.39042717 4.02220543 -1.52322523 2.40451717 -2.77177632]\n",
103 | " [-0.86817904 -0.33659723 -0.245578 -0.31292608 0.26079733]\n",
104 | " [-0.00600591 -1.69046817 0.12647375 -0.79367455 1.04614 ]]\n",
105 | "\n",
106 | "New synaptic weights (layer 2) after training: \n",
107 | "[[ 0.9614375 -0.15372521 -0.67703076 -0.00498486]\n",
108 | " [-2.7714058 0.77362787 2.71638353 -2.4249225 ]\n",
109 | " [ 1.88550044 0.70717346 -0.71729366 0.7730995 ]\n",
110 | " [-1.59473372 -0.55756571 1.23221965 -1.28695185]\n",
111 | " [ 1.92232578 0.86077523 -2.13676866 2.54238247]]\n",
112 | "\n",
113 | "New synaptic weights (layer 3) after training: \n",
114 | "[[-4.392069 ]\n",
115 | " [ 0.66563256]\n",
116 | " [ 5.76280212]\n",
117 | " [-3.88936424]]\n"
118 | ]
119 | }
120 | ],
121 | "source": [
122 | "NN = NeuralNetwork()\n",
123 | "\n",
124 | "print(\"Random starting synaptic weights (layer 1): \")\n",
125 | "print(NN.synaptic_weights1)\n",
126 | "print(\"Random starting synaptic weights (layer 2): \")\n",
127 | "print(NN.synaptic_weights2)\n",
128 | "print(\"Random starting synaptic weights (layer 3): \")\n",
129 | "print(NN.synaptic_weights3)\n",
130 | "\n",
131 | "inputs = np.array([[0,0,1],[1,1,1],[1,0,1],[0,1,1]])\n",
132 | "targets = np.array([[0,1,1,0]]).T\n",
133 | "\n",
134 | "NN.train(inputs,targets, 10000)\n",
135 | "\n",
136 | "print (\"\\nNew synaptic weights (layer 1) after training: \")\n",
137 | "print (NN.synaptic_weights1)\n",
138 | "print (\"\\nNew synaptic weights (layer 2) after training: \")\n",
139 | "print (NN.synaptic_weights2)\n",
140 | "print (\"\\nNew synaptic weights (layer 3) after training: \")\n",
141 | "print (NN.synaptic_weights3)"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 29,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [
151 | {
152 | "name": "stdout",
153 | "output_type": "stream",
154 | "text": [
155 | "\n",
156 | "Predict new value [1,0,0]: \n",
157 | "[ 0.99650838]\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "# test with new input\n",
163 | "print(\"\\nPredict new value [1,0,0]: \")\n",
164 | "print(NN.run(np.array([1,0,0])))"
165 | ]
166 | }
167 | ],
168 | "metadata": {
169 | "kernelspec": {
170 | "display_name": "Python 3",
171 | "language": "python",
172 | "name": "python3"
173 | },
174 | "language_info": {
175 | "codemirror_mode": {
176 | "name": "ipython",
177 | "version": 3
178 | },
179 | "file_extension": ".py",
180 | "mimetype": "text/x-python",
181 | "name": "python",
182 | "nbconvert_exporter": "python",
183 | "pygments_lexer": "ipython3",
184 | "version": "3.5.2"
185 | }
186 | },
187 | "nbformat": 4,
188 | "nbformat_minor": 2
189 | }
190 |
--------------------------------------------------------------------------------
/wk2-neural-Networks/feedForwardNN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "class NeuralNetwork():\n",
23 | " def __init__(self):\n",
24 | " np.random.seed(1) # Seed the random number generator\n",
25 | " self.weights = {} # Create dict to hold weights\n",
26 | " self.num_layers = 1 # Set initial number of layer to one (input layer)\n",
27 | " self.adjustments = {} # Create dict to hold adjustements\n",
28 | "\n",
29 | " def add_layer(self, shape):\n",
30 | " # Create weights with shape specified + biases\n",
31 | " self.weights[self.num_layers] = np.vstack((2 * np.random.random(shape) - 1, 2 * np.random.random((1, shape[1])) - 1))\n",
32 | " # Initialize the adjustements for these weights to zero\n",
33 | " self.adjustments[self.num_layers] = np.zeros(shape)\n",
34 | " self.num_layers += 1\n",
35 | "\n",
36 | " def __sigmoid(self, x):\n",
37 | " return 1 / (1 + np.exp(-x))\n",
38 | "\n",
39 | " def __sigmoid_derivative(self, x):\n",
40 | " return x * (1 - x)\n",
41 | "\n",
42 | " def predict(self, data):\n",
43 | " # Pass data through pretrained network\n",
44 | " for layer in range(1, self.num_layers+1):\n",
45 | " data = np.dot(data, self.weights[layer-1][:, :-1]) + self.weights[layer-1][:, -1] # + self.biases[layer]\n",
46 | " data = self.__sigmoid(data)\n",
47 | " return data\n",
48 | "\n",
49 | " def __forward_propagate(self, data):\n",
50 | " # Progapagate through network and hold values for use in back-propagation\n",
51 | " activation_values = {}\n",
52 | " activation_values[1] = data\n",
53 | " for layer in range(2, self.num_layers+1):\n",
54 | " data = np.dot(data.T, self.weights[layer-1][:-1, :]) + self.weights[layer-1][-1, :].T # + self.biases[layer]\n",
55 | " data = self.__sigmoid(data).T\n",
56 | " activation_values[layer] = data\n",
57 | " return activation_values\n",
58 | "\n",
59 | " def simple_error(self, outputs, targets):\n",
60 | " return targets - outputs\n",
61 | "\n",
62 | " def sum_squared_error(self, outputs, targets):\n",
63 | " return 0.5 * np.mean(np.sum(np.power(outputs - targets, 2), axis=1))\n",
64 | "\n",
65 | " def __back_propagate(self, output, target):\n",
66 | " deltas = {}\n",
67 | " # Delta of output Layer\n",
68 | " deltas[self.num_layers] = output[self.num_layers] - target\n",
69 | "\n",
70 | " # Delta of hidden Layers\n",
71 | " for layer in reversed(range(2, self.num_layers)): # All layers except input/output\n",
72 | " a_val = output[layer]\n",
73 | " weights = self.weights[layer][:-1, :]\n",
74 | " prev_deltas = deltas[layer+1]\n",
75 | " deltas[layer] = np.multiply(np.dot(weights, prev_deltas), self.__sigmoid_derivative(a_val))\n",
76 | "\n",
77 | " # Caclculate total adjustements based on deltas\n",
78 | " for layer in range(1, self.num_layers):\n",
79 | " self.adjustments[layer] += np.dot(deltas[layer+1], output[layer].T).T\n",
80 | "\n",
81 | " def __gradient_descente(self, batch_size, learning_rate):\n",
82 | " # Calculate partial derivative and take a step in that direction\n",
83 | " for layer in range(1, self.num_layers):\n",
84 | " partial_d = (1/batch_size) * self.adjustments[layer]\n",
85 | " self.weights[layer][:-1, :] += learning_rate * -partial_d\n",
86 | " self.weights[layer][-1, :] += learning_rate*1e-3 * -partial_d[-1, :]\n",
87 | "\n",
88 | "\n",
89 | " def train(self, inputs, targets, num_epochs, learning_rate=1, stop_accuracy=1e-5):\n",
90 | " error = []\n",
91 | " for iteration in range(num_epochs):\n",
92 | " for i in range(len(inputs)):\n",
93 | " x = inputs[i]\n",
94 | " y = targets[i]\n",
95 | " # Pass the training set through our neural network\n",
96 | " output = self.__forward_propagate(x)\n",
97 | "\n",
98 | " # Calculate the error\n",
99 | " loss = self.sum_squared_error(output[self.num_layers], y)\n",
100 | " error.append(loss)\n",
101 | "\n",
102 | " # Calculate Adjustements\n",
103 | " self.__back_propagate(output, y)\n",
104 | "\n",
105 | " self.__gradient_descente(i, learning_rate)\n",
106 | "\n",
107 | " # Check if accuarcy criterion is satisfied\n",
108 | " if np.mean(error[-(i+1):]) < stop_accuracy and iteration > 0:\n",
109 | " break\n",
110 | "\n",
111 | " return(np.asarray(error), iteration+1)\n"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 3,
117 | "metadata": {
118 | "collapsed": false
119 | },
120 | "outputs": [
121 | {
122 | "name": "stdout",
123 | "output_type": "stream",
124 | "text": [
125 | "Error = 7.29967091123e-06\n",
126 | "Epoches needed to train = 62\n"
127 | ]
128 | }
129 | ],
130 | "source": [
131 | "# Create instance of a neural network\n",
132 | "nn = NeuralNetwork()\n",
133 | "\n",
134 | "# Add Layers (Input layer is created by default)\n",
135 | "nn.add_layer((2, 9))\n",
136 | "nn.add_layer((9, 1))\n",
137 | "\n",
138 | "# XOR function\n",
139 | "training_data = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]).reshape(4, 2, 1)\n",
140 | "training_labels = np.asarray([[0], [1], [1], [0]])\n",
141 | "\n",
142 | "error, iteration = nn.train(training_data, training_labels, 5000)\n",
143 | "print('Error = ', np.mean(error[-4:]))\n",
144 | "print('Epoches needed to train = ', iteration)\n"
145 | ]
146 | }
147 | ],
148 | "metadata": {
149 | "kernelspec": {
150 | "display_name": "Python 3",
151 | "language": "python",
152 | "name": "python3"
153 | },
154 | "language_info": {
155 | "codemirror_mode": {
156 | "name": "ipython",
157 | "version": 3
158 | },
159 | "file_extension": ".py",
160 | "mimetype": "text/x-python",
161 | "name": "python",
162 | "nbconvert_exporter": "python",
163 | "pygments_lexer": "ipython3",
164 | "version": "3.5.2"
165 | }
166 | },
167 | "nbformat": 4,
168 | "nbformat_minor": 2
169 | }
170 |
--------------------------------------------------------------------------------
/wk5-speed-dating/.ipynb_checkpoints/Speed dating prediction-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Speed dating predicton\n",
8 | " - [Kaggle Speed dating experiment](https://www.kaggle.com/annavictoria/speed-dating-experiment)\n",
9 | " - Learning fun [Siraj's DL #5](https://www.youtube.com/watch?v=koiTTim4M-s)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "collapsed": true
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%matplotlib inline\n",
21 | "import pandas as pd\n",
22 | "import numpy as np\n",
23 | "import matplotlib.pyplot as plt"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {
30 | "collapsed": false
31 | },
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/html": [
36 | "
\n",
37 | "
\n",
38 | " \n",
39 | " \n",
40 | " | \n",
41 | " iid | \n",
42 | " id | \n",
43 | " gender | \n",
44 | " idg | \n",
45 | " condtn | \n",
46 | " wave | \n",
47 | " round | \n",
48 | " position | \n",
49 | " positin1 | \n",
50 | " order | \n",
51 | " ... | \n",
52 | " attr3_3 | \n",
53 | " sinc3_3 | \n",
54 | " intel3_3 | \n",
55 | " fun3_3 | \n",
56 | " amb3_3 | \n",
57 | " attr5_3 | \n",
58 | " sinc5_3 | \n",
59 | " intel5_3 | \n",
60 | " fun5_3 | \n",
61 | " amb5_3 | \n",
62 | "
\n",
63 | " \n",
64 | " \n",
65 | " \n",
66 | " 0 | \n",
67 | " 1 | \n",
68 | " 1.0 | \n",
69 | " 0 | \n",
70 | " 1 | \n",
71 | " 1 | \n",
72 | " 1 | \n",
73 | " 10 | \n",
74 | " 7 | \n",
75 | " NaN | \n",
76 | " 4 | \n",
77 | " ... | \n",
78 | " 5.0 | \n",
79 | " 7.0 | \n",
80 | " 7.0 | \n",
81 | " 7.0 | \n",
82 | " 7.0 | \n",
83 | " NaN | \n",
84 | " NaN | \n",
85 | " NaN | \n",
86 | " NaN | \n",
87 | " NaN | \n",
88 | "
\n",
89 | " \n",
90 | " 1 | \n",
91 | " 1 | \n",
92 | " 1.0 | \n",
93 | " 0 | \n",
94 | " 1 | \n",
95 | " 1 | \n",
96 | " 1 | \n",
97 | " 10 | \n",
98 | " 7 | \n",
99 | " NaN | \n",
100 | " 3 | \n",
101 | " ... | \n",
102 | " 5.0 | \n",
103 | " 7.0 | \n",
104 | " 7.0 | \n",
105 | " 7.0 | \n",
106 | " 7.0 | \n",
107 | " NaN | \n",
108 | " NaN | \n",
109 | " NaN | \n",
110 | " NaN | \n",
111 | " NaN | \n",
112 | "
\n",
113 | " \n",
114 | " 2 | \n",
115 | " 1 | \n",
116 | " 1.0 | \n",
117 | " 0 | \n",
118 | " 1 | \n",
119 | " 1 | \n",
120 | " 1 | \n",
121 | " 10 | \n",
122 | " 7 | \n",
123 | " NaN | \n",
124 | " 10 | \n",
125 | " ... | \n",
126 | " 5.0 | \n",
127 | " 7.0 | \n",
128 | " 7.0 | \n",
129 | " 7.0 | \n",
130 | " 7.0 | \n",
131 | " NaN | \n",
132 | " NaN | \n",
133 | " NaN | \n",
134 | " NaN | \n",
135 | " NaN | \n",
136 | "
\n",
137 | " \n",
138 | " 3 | \n",
139 | " 1 | \n",
140 | " 1.0 | \n",
141 | " 0 | \n",
142 | " 1 | \n",
143 | " 1 | \n",
144 | " 1 | \n",
145 | " 10 | \n",
146 | " 7 | \n",
147 | " NaN | \n",
148 | " 5 | \n",
149 | " ... | \n",
150 | " 5.0 | \n",
151 | " 7.0 | \n",
152 | " 7.0 | \n",
153 | " 7.0 | \n",
154 | " 7.0 | \n",
155 | " NaN | \n",
156 | " NaN | \n",
157 | " NaN | \n",
158 | " NaN | \n",
159 | " NaN | \n",
160 | "
\n",
161 | " \n",
162 | " 4 | \n",
163 | " 1 | \n",
164 | " 1.0 | \n",
165 | " 0 | \n",
166 | " 1 | \n",
167 | " 1 | \n",
168 | " 1 | \n",
169 | " 10 | \n",
170 | " 7 | \n",
171 | " NaN | \n",
172 | " 7 | \n",
173 | " ... | \n",
174 | " 5.0 | \n",
175 | " 7.0 | \n",
176 | " 7.0 | \n",
177 | " 7.0 | \n",
178 | " 7.0 | \n",
179 | " NaN | \n",
180 | " NaN | \n",
181 | " NaN | \n",
182 | " NaN | \n",
183 | " NaN | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
5 rows × 195 columns
\n",
188 | "
"
189 | ],
190 | "text/plain": [
191 | " iid id gender idg condtn wave round position positin1 order \\\n",
192 | "0 1 1.0 0 1 1 1 10 7 NaN 4 \n",
193 | "1 1 1.0 0 1 1 1 10 7 NaN 3 \n",
194 | "2 1 1.0 0 1 1 1 10 7 NaN 10 \n",
195 | "3 1 1.0 0 1 1 1 10 7 NaN 5 \n",
196 | "4 1 1.0 0 1 1 1 10 7 NaN 7 \n",
197 | "\n",
198 | " ... attr3_3 sinc3_3 intel3_3 fun3_3 amb3_3 attr5_3 sinc5_3 \\\n",
199 | "0 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
200 | "1 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
201 | "2 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
202 | "3 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
203 | "4 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
204 | "\n",
205 | " intel5_3 fun5_3 amb5_3 \n",
206 | "0 NaN NaN NaN \n",
207 | "1 NaN NaN NaN \n",
208 | "2 NaN NaN NaN \n",
209 | "3 NaN NaN NaN \n",
210 | "4 NaN NaN NaN \n",
211 | "\n",
212 | "[5 rows x 195 columns]"
213 | ]
214 | },
215 | "execution_count": 2,
216 | "metadata": {},
217 | "output_type": "execute_result"
218 | }
219 | ],
220 | "source": [
221 | "df =pd.read_csv('Speed Dating Data.csv', encoding=\"ISO-8859-1\")\n",
222 | "df.head()"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 3,
228 | "metadata": {
229 | "collapsed": false
230 | },
231 | "outputs": [
232 | {
233 | "data": {
234 | "text/plain": [
235 | "0 0\n",
236 | "1 0\n",
237 | "2 1\n",
238 | "3 1\n",
239 | "4 1\n",
240 | "Name: match, dtype: int64"
241 | ]
242 | },
243 | "execution_count": 3,
244 | "metadata": {},
245 | "output_type": "execute_result"
246 | }
247 | ],
248 | "source": [
249 | "df['match'].head()"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": 4,
255 | "metadata": {
256 | "collapsed": false
257 | },
258 | "outputs": [
259 | {
260 | "data": {
261 | "text/plain": [
262 | "(8378, 195)"
263 | ]
264 | },
265 | "execution_count": 4,
266 | "metadata": {},
267 | "output_type": "execute_result"
268 | }
269 | ],
270 | "source": [
271 | "df.shape"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 5,
277 | "metadata": {
278 | "collapsed": false
279 | },
280 | "outputs": [
281 | {
282 | "name": "stdout",
283 | "output_type": "stream",
284 | "text": [
285 | "\n",
286 | "RangeIndex: 8378 entries, 0 to 8377\n",
287 | "Columns: 195 entries, iid to amb5_3\n",
288 | "dtypes: float64(174), int64(13), object(8)\n",
289 | "memory usage: 12.5+ MB\n"
290 | ]
291 | }
292 | ],
293 | "source": [
294 | "df.info()"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "### First of all, let's just seperate features and labels"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 6,
307 | "metadata": {
308 | "collapsed": false
309 | },
310 | "outputs": [],
311 | "source": [
312 | "df, df_labels = df.drop(['match'], axis=1), df['match']"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "# 1. Preprocessing Data\n",
320 | " - 1.1 Cleaning\n",
321 | " - 1.2 Transformation\n",
322 | " - 1.3 Reduction by PCA"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "## 1.1 Cleaning"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "### Cleaning null features\n",
337 | "If a feature has more than 30% (2513) of values are null, we just drop the whole column. "
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": 7,
343 | "metadata": {
344 | "collapsed": false
345 | },
346 | "outputs": [
347 | {
348 | "name": "stdout",
349 | "output_type": "stream",
350 | "text": [
351 | "194\n",
352 | "194\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "na_sum = list(df.isnull().sum())\n",
358 | "print(len(na_sum))\n",
359 | "#na_col = list(df.isnull().sum().index)\n",
360 | "#print(len(na_col))"
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": 8,
366 | "metadata": {
367 | "collapsed": false
368 | },
369 | "outputs": [
370 | {
371 | "name": "stdout",
372 | "output_type": "stream",
373 | "text": [
374 | "We can drop 83 Columns\n"
375 | ]
376 | }
377 | ],
378 | "source": [
379 | "drop_col =[]\n",
380 | "for i in range(len(na_sum)):\n",
381 | " if na_sum[i] > 2523:\n",
382 | " drop_col.append(na_col[i])\n",
383 | "print(\"We can drop \",len(drop_col),\" Columns\")"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": 9,
389 | "metadata": {
390 | "collapsed": false
391 | },
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/html": [
396 | "\n",
397 | "
\n",
398 | " \n",
399 | " \n",
400 | " | \n",
401 | " iid | \n",
402 | " id | \n",
403 | " gender | \n",
404 | " idg | \n",
405 | " condtn | \n",
406 | " wave | \n",
407 | " round | \n",
408 | " position | \n",
409 | " positin1 | \n",
410 | " order | \n",
411 | " ... | \n",
412 | " sinc1_2 | \n",
413 | " intel1_2 | \n",
414 | " fun1_2 | \n",
415 | " amb1_2 | \n",
416 | " shar1_2 | \n",
417 | " attr3_2 | \n",
418 | " sinc3_2 | \n",
419 | " intel3_2 | \n",
420 | " fun3_2 | \n",
421 | " amb3_2 | \n",
422 | "
\n",
423 | " \n",
424 | " \n",
425 | " \n",
426 | " 0 | \n",
427 | " 1 | \n",
428 | " 1.0 | \n",
429 | " 0 | \n",
430 | " 1 | \n",
431 | " 1 | \n",
432 | " 1 | \n",
433 | " 10 | \n",
434 | " 7 | \n",
435 | " NaN | \n",
436 | " 4 | \n",
437 | " ... | \n",
438 | " 16.67 | \n",
439 | " 13.89 | \n",
440 | " 22.22 | \n",
441 | " 11.11 | \n",
442 | " 16.67 | \n",
443 | " 6.0 | \n",
444 | " 7.0 | \n",
445 | " 8.0 | \n",
446 | " 7.0 | \n",
447 | " 6.0 | \n",
448 | "
\n",
449 | " \n",
450 | " 1 | \n",
451 | " 1 | \n",
452 | " 1.0 | \n",
453 | " 0 | \n",
454 | " 1 | \n",
455 | " 1 | \n",
456 | " 1 | \n",
457 | " 10 | \n",
458 | " 7 | \n",
459 | " NaN | \n",
460 | " 3 | \n",
461 | " ... | \n",
462 | " 16.67 | \n",
463 | " 13.89 | \n",
464 | " 22.22 | \n",
465 | " 11.11 | \n",
466 | " 16.67 | \n",
467 | " 6.0 | \n",
468 | " 7.0 | \n",
469 | " 8.0 | \n",
470 | " 7.0 | \n",
471 | " 6.0 | \n",
472 | "
\n",
473 | " \n",
474 | " 2 | \n",
475 | " 1 | \n",
476 | " 1.0 | \n",
477 | " 0 | \n",
478 | " 1 | \n",
479 | " 1 | \n",
480 | " 1 | \n",
481 | " 10 | \n",
482 | " 7 | \n",
483 | " NaN | \n",
484 | " 10 | \n",
485 | " ... | \n",
486 | " 16.67 | \n",
487 | " 13.89 | \n",
488 | " 22.22 | \n",
489 | " 11.11 | \n",
490 | " 16.67 | \n",
491 | " 6.0 | \n",
492 | " 7.0 | \n",
493 | " 8.0 | \n",
494 | " 7.0 | \n",
495 | " 6.0 | \n",
496 | "
\n",
497 | " \n",
498 | " 3 | \n",
499 | " 1 | \n",
500 | " 1.0 | \n",
501 | " 0 | \n",
502 | " 1 | \n",
503 | " 1 | \n",
504 | " 1 | \n",
505 | " 10 | \n",
506 | " 7 | \n",
507 | " NaN | \n",
508 | " 5 | \n",
509 | " ... | \n",
510 | " 16.67 | \n",
511 | " 13.89 | \n",
512 | " 22.22 | \n",
513 | " 11.11 | \n",
514 | " 16.67 | \n",
515 | " 6.0 | \n",
516 | " 7.0 | \n",
517 | " 8.0 | \n",
518 | " 7.0 | \n",
519 | " 6.0 | \n",
520 | "
\n",
521 | " \n",
522 | " 4 | \n",
523 | " 1 | \n",
524 | " 1.0 | \n",
525 | " 0 | \n",
526 | " 1 | \n",
527 | " 1 | \n",
528 | " 1 | \n",
529 | " 10 | \n",
530 | " 7 | \n",
531 | " NaN | \n",
532 | " 7 | \n",
533 | " ... | \n",
534 | " 16.67 | \n",
535 | " 13.89 | \n",
536 | " 22.22 | \n",
537 | " 11.11 | \n",
538 | " 16.67 | \n",
539 | " 6.0 | \n",
540 | " 7.0 | \n",
541 | " 8.0 | \n",
542 | " 7.0 | \n",
543 | " 6.0 | \n",
544 | "
\n",
545 | " \n",
546 | "
\n",
547 | "
5 rows × 111 columns
\n",
548 | "
"
549 | ],
550 | "text/plain": [
551 | " iid id gender idg condtn wave round position positin1 order \\\n",
552 | "0 1 1.0 0 1 1 1 10 7 NaN 4 \n",
553 | "1 1 1.0 0 1 1 1 10 7 NaN 3 \n",
554 | "2 1 1.0 0 1 1 1 10 7 NaN 10 \n",
555 | "3 1 1.0 0 1 1 1 10 7 NaN 5 \n",
556 | "4 1 1.0 0 1 1 1 10 7 NaN 7 \n",
557 | "\n",
558 | " ... sinc1_2 intel1_2 fun1_2 amb1_2 shar1_2 attr3_2 sinc3_2 \\\n",
559 | "0 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
560 | "1 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
561 | "2 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
562 | "3 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
563 | "4 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
564 | "\n",
565 | " intel3_2 fun3_2 amb3_2 \n",
566 | "0 8.0 7.0 6.0 \n",
567 | "1 8.0 7.0 6.0 \n",
568 | "2 8.0 7.0 6.0 \n",
569 | "3 8.0 7.0 6.0 \n",
570 | "4 8.0 7.0 6.0 \n",
571 | "\n",
572 | "[5 rows x 111 columns]"
573 | ]
574 | },
575 | "execution_count": 9,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "df = df.drop(drop_col,axis=1)\n",
582 | "df.head()"
583 | ]
584 | },
585 | {
586 | "cell_type": "markdown",
587 | "metadata": {},
588 | "source": [
589 | "### Imputing null values with mean"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": 10,
595 | "metadata": {
596 | "collapsed": false
597 | },
598 | "outputs": [],
599 | "source": [
600 | "df = df.fillna(df.mean())"
601 | ]
602 | },
603 | {
604 | "cell_type": "code",
605 | "execution_count": 11,
606 | "metadata": {
607 | "collapsed": false
608 | },
609 | "outputs": [
610 | {
611 | "data": {
612 | "text/plain": [
613 | "True"
614 | ]
615 | },
616 | "execution_count": 11,
617 | "metadata": {},
618 | "output_type": "execute_result"
619 | }
620 | ],
621 | "source": [
622 | "#check if any NaN values\n",
623 | "df.isnull().values.any()"
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {},
629 | "source": [
630 | "#### This means there are still columns which have null values. Let's further check."
631 | ]
632 | },
633 | {
634 | "cell_type": "code",
635 | "execution_count": 12,
636 | "metadata": {
637 | "collapsed": false
638 | },
639 | "outputs": [
640 | {
641 | "data": {
642 | "text/plain": [
643 | "['field', 'race', 'imprace', 'income']"
644 | ]
645 | },
646 | "execution_count": 12,
647 | "metadata": {},
648 | "output_type": "execute_result"
649 | }
650 | ],
651 | "source": [
652 | "na_sum = list(df.isnull().sum())\n",
653 | "#na_col = list(df.isnull().sum().index)\n",
654 | "nan_col =[]\n",
655 | "for i in range(len(na_sum)):\n",
656 | " if na_sum[i] > 0:\n",
657 | " nan_col.append(na_col[i])\n",
658 | "nan_col"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": 13,
664 | "metadata": {
665 | "collapsed": false
666 | },
667 | "outputs": [
668 | {
669 | "data": {
670 | "text/html": [
671 | "\n",
672 | "
\n",
673 | " \n",
674 | " \n",
675 | " | \n",
676 | " field | \n",
677 | " from | \n",
678 | " zipcode | \n",
679 | " career | \n",
680 | "
\n",
681 | " \n",
682 | " \n",
683 | " \n",
684 | " 0 | \n",
685 | " Law | \n",
686 | " Chicago | \n",
687 | " 60,521 | \n",
688 | " lawyer | \n",
689 | "
\n",
690 | " \n",
691 | " 1 | \n",
692 | " Law | \n",
693 | " Chicago | \n",
694 | " 60,521 | \n",
695 | " lawyer | \n",
696 | "
\n",
697 | " \n",
698 | " 2 | \n",
699 | " Law | \n",
700 | " Chicago | \n",
701 | " 60,521 | \n",
702 | " lawyer | \n",
703 | "
\n",
704 | " \n",
705 | " 3 | \n",
706 | " Law | \n",
707 | " Chicago | \n",
708 | " 60,521 | \n",
709 | " lawyer | \n",
710 | "
\n",
711 | " \n",
712 | " 4 | \n",
713 | " Law | \n",
714 | " Chicago | \n",
715 | " 60,521 | \n",
716 | " lawyer | \n",
717 | "
\n",
718 | " \n",
719 | "
\n",
720 | "
"
721 | ],
722 | "text/plain": [
723 | " field from zipcode career\n",
724 | "0 Law Chicago 60,521 lawyer\n",
725 | "1 Law Chicago 60,521 lawyer\n",
726 | "2 Law Chicago 60,521 lawyer\n",
727 | "3 Law Chicago 60,521 lawyer\n",
728 | "4 Law Chicago 60,521 lawyer"
729 | ]
730 | },
731 | "execution_count": 13,
732 | "metadata": {},
733 | "output_type": "execute_result"
734 | }
735 | ],
736 | "source": [
737 | "df[['field', 'from', 'zipcode', 'career']].head()"
738 | ]
739 | },
740 | {
741 | "cell_type": "markdown",
742 | "metadata": {},
743 | "source": [
744 | "#### These columns are values with object type. It is hard to predict null values for these variables. So, let's just drop all these variables. "
745 | ]
746 | },
747 | {
748 | "cell_type": "code",
749 | "execution_count": 14,
750 | "metadata": {
751 | "collapsed": true
752 | },
753 | "outputs": [],
754 | "source": [
755 | "df = df.drop(['from','zipcode','field','career'], axis=1)"
756 | ]
757 | },
758 | {
759 | "cell_type": "code",
760 | "execution_count": 15,
761 | "metadata": {
762 | "collapsed": false
763 | },
764 | "outputs": [
765 | {
766 | "data": {
767 | "text/plain": [
768 | "False"
769 | ]
770 | },
771 | "execution_count": 15,
772 | "metadata": {},
773 | "output_type": "execute_result"
774 | }
775 | ],
776 | "source": [
777 | "df.isnull().values.any()"
778 | ]
779 | },
780 | {
781 | "cell_type": "markdown",
782 | "metadata": {},
783 | "source": [
784 | "#### We have sucessfully cleaned all null variables in the dataset. "
785 | ]
786 | },
787 | {
788 | "cell_type": "markdown",
789 | "metadata": {},
790 | "source": [
791 | "## 1.2 Transformation"
792 | ]
793 | },
794 | {
795 | "cell_type": "markdown",
796 | "metadata": {},
797 | "source": [
798 | "### Normalize data"
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": 16,
804 | "metadata": {
805 | "collapsed": true
806 | },
807 | "outputs": [],
808 | "source": [
809 | "from sklearn.preprocessing import StandardScaler"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 17,
815 | "metadata": {
816 | "collapsed": false
817 | },
818 | "outputs": [],
819 | "source": [
820 | "X = StandardScaler().fit_transform(df)"
821 | ]
822 | },
823 | {
824 | "cell_type": "code",
825 | "execution_count": 18,
826 | "metadata": {
827 | "collapsed": false
828 | },
829 | "outputs": [
830 | {
831 | "data": {
832 | "text/plain": [
833 | "(8378, 107)"
834 | ]
835 | },
836 | "execution_count": 18,
837 | "metadata": {},
838 | "output_type": "execute_result"
839 | }
840 | ],
841 | "source": [
842 | "X.shape"
843 | ]
844 | },
845 | {
846 | "cell_type": "markdown",
847 | "metadata": {},
848 | "source": [
849 | "## 1.3 Reduction"
850 | ]
851 | },
852 | {
853 | "cell_type": "markdown",
854 | "metadata": {},
855 | "source": [
856 | "### PCA\n",
857 | " - Find out what is fairly good value for n_components according to the Explained Variance Ratio\n",
858 | " - Reduce dimensions by the n_components"
859 | ]
860 | },
861 | {
862 | "cell_type": "code",
863 | "execution_count": 19,
864 | "metadata": {
865 | "collapsed": false
866 | },
867 | "outputs": [
868 | {
869 | "data": {
870 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEKCAYAAAAB0GKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4XXWd7/H3NzuXnXuae9qmbUrTlnLH2hZF5SIKeOno\noIADCs4Mwwhej+PozJyH0Tnn6DPjeI56PFQUUEaUAQStWG6DFxBo6QVoaUtLm1KaXtNbLs09+Z4/\n1gps0qRZLd3Zyc7n9Tzr2Wuv9Vt7f3+95Ju1fjdzd0REREaSkeoARERkfFDCEBGRSJQwREQkEiUM\nERGJRAlDREQiUcIQEZFIlDBERCQSJQwREYlECUNERCLJTHUAJ1N5ebnPmDEj1WGIiIwbq1ev3u/u\nFVHKplXCmDFjBqtWrUp1GCIi44aZbY9aVo+kREQkEiUMERGJRAlDREQiUcIQEZFIkpowzOxSM9tk\nZlvM7KtDnDcz+154fq2ZnZtwrsTM7jezl81so5mdl8xYRUTk2JKWMMwsBvwAuAyYB1xtZvMGFbsM\nqA+3G4BbE859F3jE3ecCZwEbkxWriIiMLJl3GAuALe7e4O7dwD3A4kFlFgN3eWA5UGJmNWZWDLwb\nuB3A3bvd/XASYxURkREkM2FMAXYkvG8Mj0UpUwc0AXea2fNm9mMzy09GkP39zv/93Sv8cXNTMj5e\nRCRtjNVG70zgXOBWdz8HOAIc1QYCYGY3mNkqM1vV1HT8P/QzMowfPtnA71/e95YCFhFJd8lMGDuB\n2oT3U8NjUco0Ao3uviI8fj9BAjmKu9/m7vPdfX5FRaTR7UepKoqzt6XzhK4VEZkokpkwVgL1ZlZn\nZtnAVcDSQWWWAp8Me0stAprdfbe77wF2mNmcsNzFwIZkBVpVlKOEISIygqTNJeXuvWZ2M/AoEAPu\ncPf1ZnZjeH4JsAy4HNgCtAPXJ3zEZ4G7w2TTMOjcSVVVGGfFtoPJ+ngRkbSQ1MkH3X0ZQVJIPLYk\nYd+Bm4a59gVgfjLjG1BZFGdfayfujpmNxleKiIw7Y7XRe1RVFubQ0+ccau9JdSgiImOWEgZBozeg\ndgwRkWNQwiBo9AbY19qV4khERMYuJQx0hyEiEoUSBlBRGN5hKGGIiAxLCQOIZ8Uoyctib4seSYmI\nDEcJI1RZqMF7IiLHooQRqiqKs1eN3iIiw1LCCFUWxmnSHYaIyLCUMEJVRTnsa+2iv99THYqIyJik\nhBGqKorT2+8cbO9OdSgiImOSEkZoYPCeGr5FRIamhBGqKAwG7+1T11oRkSEpYYR0hyEicmxKGKHX\nR3ura62IyJCUMEI5mTFK87N1hyEiMgwljATBaG/dYYiIDEUJI0FVuPKeiIgcTQkjgeaTEhEZnhJG\ngqqiOE2tXfRptLeIyFGUMBJUFeXQ73CgTe0YIiKDKWEkqAxX3lPXWhGRoylhJNBSrSIiw1PCSDAw\n2nvX4Y4URyIiMvYkNWGY2aVmtsnMtpjZV4c4b2b2vfD8WjM7N+Hcq2a2zsxeMLNVyYxzQHVRnPKC\nHNa8dng0vk5EZFzJTNYHm1kM+AFwCdAIrDSzpe6+IaHYZUB9uC0Ebg1fB1zo7vuTFeNgZsbCulKW\nNxzA3TGz0fpqEZExL5l3GAuALe7e4O7dwD3A4kFlFgN3eWA5UGJmNUmMaUSLZpayu7mTHQf1WEpE\nJFEyE8YUYEfC+8bwWNQyDvyXma02sxuSFuUgC2eWAbB824HR+koRkXFhLDd6n+/uZxM8trrJzN49\nVCEzu8HMVpnZqqamprf8pfWVBZTmZ7O8QQlDRCRRMhPGTqA24f3U8FikMu4+8LoPeJDgEddR3P02\nd5/v7vMrKirectBmxoIZpaxoOPiWP0tEJJ0kM2GsBOrNrM7MsoGrgKWDyiwFPhn2lloENLv7bjPL\nN7NCADPLB94HvJTEWN9k0cxSdh7uYMfB9tH6ShGRMS9pvaTcvdfMbgYeBWLAHe6+3sxuDM8vAZYB\nlwNbgHbg+vDyKuDBsJdSJvBzd38kWbEONtCOsWLbQWpL80bra0VExrSkJQwAd19GkBQSjy1J2Hfg\npiGuawDOSmZsxzKnqpCSvCxWNBzgirdNTVUYIiJjylhu9E6ZjIywHWOb2jFERAYoYQxj4cwyXjvY\nrmlCRERCIyYMM5tqZg+aWZOZ7TOzX5pZ2j+nmT99EgBrGzVNiIgIRLvDuJOgN1MNMBn4TXgsrc0o\nywfQiG8RkVCUhFHh7ne6e2+4/QR46wMexrjivCyK4pm8pq61IiJAtIRxwMyuMbNYuF0DTIhh0NPK\n8pQwRERCURLGp4GPA3uA3cAVvDFeIq1NK83T4D0RkdCI4zDcfTvw4VGIZcypLc3jvzbso6/fiWVo\nqnMRmdiGTRhm9hV3/1cz+z7BzLFv4u6fS2pkY8C00jy6+/rZ29LJ5JLcVIcjIpJSx7rD2Bi+jspq\nd2PRtHBakNcOtithiMiEN2zCcPffhLvt7n5f4jkz+1hSoxojBhLGjoPtLArnlxIRmaiiNHp/LeKx\ntDO5JJcMQw3fIiIcuw3jMoKZZKeY2fcSThUBvckObCzIimVQU5yrrrUiIhy7DWMXQfvFh4HVCcdb\ngS8mM6ixZFqpxmKIiMCx2zBeBF40s5+7e88oxjSmTCvN44mX96U6DBGRlIuyHsYMM/smMA+IDxx0\n95lJi2oMmVaWx/62Ltq7e8nLTuryISIiY1rUyQdvJWi3uBC4C/hZMoMaS2pf7ymlSQhFZGKLkjBy\n3f0JwNx9u7v/M/CB5IY1diSOxRARmciiPGPpMrMM4JVwje6dQEFywxo7EsdiiIhMZFHuMD4P5AGf\nA94GXAN8KplBjSWT8rIoyNE05yIix7zDMLMYcKW7fxloY4LMUpvIzKjVrLUiIse+w3D3PuD8UYpl\nzJpWqsF7IiJR2jCeN7OlwH3AkYGD7v5A0qIaY2on5fGHTU24O2aa5lxEJqYoCSNOsMLeRQnHHJgw\nCWNaWR5dvf3sa+2iqig+8gUiImkoygJKJ9xuYWaXAt8FYsCP3f1bg85beP5yoB24zt3XJJyPEUxP\nstPdP3iicbxVp9YUAfD8a4e59PTqVIUhIpJSUXpJnZDwh/0PgMsIRolfbWbzBhW7DKgPtxsIBggm\n+jxvrMuRMmfXlpCfHePpLftTHYqISMokLWEAC4At7t7g7t3APcDiQWUWA3d5YDlQYmY1AGY2lWCA\n4I+TGGMkWbEMFs4s409KGCIygSUzYUwBdiS8bwyPRS3zf4CvAP3JCvB4vHNWOdv2H6HxkHpLicjE\nNGLCMLMqM7vdzB4O388zs79MZlBm9kFgn7uvjlD2BjNbZWarmpqakhbTu+rLAXhmy4GkfYeIyFgW\n5Q7jJ8CjwOTw/WbgCxGu2wnUJryfGh6LUuadwIfN7FWCR1kXmdmQEx66+23uPt/d51dUVEQI68TU\nVxZQUZjDU3osJSITVJSEUe7u9xI+GnL3XqAvwnUrgXozqzOzbOAqYOmgMkuBT1pgEdDs7rvd/Wvu\nPtXdZ4TX/c7dr4lYp6QwM86fVc4zW/bT3++pDEVEJCWiJIwjZlZGMPaCgR/sI10UJpabCe5ONgL3\nuvt6M7vRzG4Miy0DGoAtwI+Azxx/FUbP+bPKOXCkm417WlIdiojIqIsycO9LBHcCp5jZ00AFcEWU\nD3f3ZQRJIfHYkoR9B24a4TP+APwhyvcl2ztnBe0YT2/Zz2mTi1McjYjI6BrxDiMcSPce4B3A3wCn\nufvaZAc2FlUXx5lVWcCf1PAtIhNQlF5SNwEF7r7e3V8CCsxsTD86SqbzZ5Xz3LYDdPVGacYREUkf\nUdow/trdDw+8cfdDwF8nL6Sx7bxTyujs6Wdt44jNOCIiaSVKwohZwhSt4ZQf2ckLaWxbMKMUgOVb\n9VhKRCaWKAnjEeA/zexiM7sY+EV4bEKalJ/N3OpCVmw7mOpQRERGVZReUn9P0Nj9t+H7xxkD8zul\n0sK6Uu5d1UhPXz9ZsWTOriIiMnZE6SXV7+63uvsV4fbDcCW+CWvRzDI6evrUjiEiE0qUXlLvNLPH\nzWyzmTWY2TYzaxiN4MaqBXVhO0aD2jFEZOKI8jzlduA7BGt7vx2YH75OWGUFOdRXFqgdQ0QmlCht\nGM3u/nDSIxlnFs4s5cE1O+nt6ydT7RgiMgFE+Un3ezP7NzM7z8zOHdiSHtkYt7CujCPdfby0S/NK\nicjEEOUOY2H4Oj/hmAMXnfxwxo+FM4N2jBUNBzi7tiTF0YiIJN+ICcPdLxyNQMabysI4MyvyWd5w\ngL95zympDkdEJOmi3GFgZh8ATgPiA8fc/RvJCmq8WDCjlIdf2oO7kzAYXkQkLUXpVrsEuBL4LGDA\nx4DpSY5rXJhbXUhzRw/7WrtSHYqISNJFafR+h7t/Ejjk7l8HzgNmJzes8WF2VSEAm/e2pjgSEZHk\ni5IwOsLXdjObDPQANckLafyYXR0kjE17lDBEJP1FacN4yMxKgH8D1hD0kJrQc0kNKC/IoTQ/m1f2\ntqU6FBGRpIvSS+pfwt1fmtlDQNzdNYlSqL6ygM37dIchIulv2IRhZhe5++/M7KNDnMPdH0huaOPD\nnOpCHlizUz2lRCTtHesO4z3A74APDXHOASUMoL6qkLauXnY1dzKlJDfV4YiIJM2wCcPdbzGzDOBh\nd793FGMaV2ZXFgBBTyklDBFJZ8fsJeXu/cBXRimWcen1rrXqKSUiaS5Kt9r/MrMvm1mtmZUObEmP\nbJyYlJ9NRWEOm9VTSkTSXJSEcSVwE/AksDrcVkX5cDO71Mw2mdkWM/vqEOfNzL4Xnl87MAuumcXN\n7Dkze9HM1pvZ16NXafTNrirgFfWUEpE0F6Vbbd2JfLCZxYAfAJcAjcBKM1vq7hsSil0G1IfbQuDW\n8LULuMjd28wsC/iTmT3s7stPJJZkm11VyD3P7aC/38nIUE8pEUlPUScfPB2Yx5snH7xrhMsWAFvc\nvSH8jHuAxUBiwlgM3OXuDiw3sxIzq3H33cDAM56scPMosabC7KpCOnr6aDzUwbSyvFSHIyKSFFEm\nH7wF+H64XQj8K/DhCJ89BdiR8L4xPBapjJnFzOwFYB/wuLuviPCdKTG76o2eUiIi6SpKG8YVwMXA\nHne/HjgLKE5qVIC797n72cBUYEF4l3MUM7vBzFaZ2aqmpqZkhzWk+rCn1CYlDBFJY5EmHwy71/aa\nWRHBb/y1Ea7bOajc1PDYcZVx98PA74FLh/oSd7/N3ee7+/yKiooIYZ18RfEsaorjvKKEISJpLErC\nWBVOPvgjgh5Sa4BnI1y3Eqg3szozywauApYOKrMU+GTYW2oR0Ozuu82sIvxOzCyXoOH85WhVSo15\nNUU8v+MwQXOMiEj6idJL6jPh7hIzewQocve1Ea7rNbObgUeBGHCHu683sxvD80uAZcDlwBagHbg+\nvLwG+GnY0yoDuNfdHzq+qo2uC+dW8sTL+3hlX9vrg/lERNLJiAnDzJYC9wC/dvdXj+fD3X0ZQVJI\nPLYkYd8JxngMvm4tcM7xfFeqXTKvin/61Us8vmGvEoaIpKUoj6T+HTgf2GBm95vZFWYWH+miiaaq\nKM5ZtSU8tn5PqkMREUmKEROGu/8xfCw1E/gh8HGChm8Z5H3zqnixsZk9zZ2pDkVE5KSLcocx0PD8\n58CNwNuBnyYzqPHqffOqAHh8494URyIicvJFGbh3L7ARuAj4v8Ap7v7ZZAc2Hs2qLKCuPF+PpUQk\nLUWZGuR24Gp370t2MOOdmXHJvCrufHobLZ09FMWzUh2SiMhJE6UN41Eli+jeN6+Knj7nD5tSM+pc\nRCRZIrVhSHTnTJtEeUEOdy/frkF8IpJWlDBOsliG8aVLZrNi20F+/txrqQ5HROSkGbYNY2Axo+G4\n+5qTH056uHpBLb9dt4tvLnuZC+ZUaq1vEUkLx7rD+Pdw+wGwAriNYD6pFeExGYaZ8a2Pnkm/O197\nYJ0eTYlIWhg2Ybj7he5+IbAbODecEfZtBFN2DJ51VgapLc3j7y+dy5Obm/j1C7tSHY6IyFsWpQ1j\njruvG3jj7i8BpyYvpPRx7aLpzCzP5z9X7hi5sIjIGBclYaw1sx+b2QXh9iNgxNlqBTIyjA+cWcOK\nbQfY39aV6nBERN6SKAnjemA98Plw28Ab05DLCC4/o4Z+h0de0uhvERnfoqyH0WlmS4Bl7r5pFGJK\nK3OrC5lZns+ydbu5ZtH0VIcjInLCoswl9WHgBeCR8P3Z4RoZEoGZcfkZNSxv0GMpERnfojySugVY\nABwGcPcXgLpkBpVuLjujmn6Hx9ZrFlsRGb+iJIwed28edEwDC47DvJoiZpTlsWzd7lSHIiJywqIk\njPVm9gkgZmb1ZvZ94Jkkx5VWBh5LPdtwgINHulMdjojICYmSMD4LnAZ0Ab8AWoAvJDOodPSBM2vo\n63eW/HFrqkMRETkhUXpJtQP/GG5ygk6bXMwnFk7jticbOLWmkI+cMzXVIYmIHJcRE4aZzQa+DMxI\nLO/uFyUvrPT09Q+fRkNTG3//y3VML8vn3GmTUh2SiEhkUR5J3Qc8D/wT8HcJmxynrFgGt/7F26gu\ninPDXavVzVZExpUoCaPX3W919+fcffXAlvTI0tSk/GyWXPM29rd1cd+qxlSHIyISWZSE8Rsz+4yZ\n1ZhZ6cAW5cPN7FIz22RmW8zsq0OcNzP7Xnh+7cAaHGZWa2a/N7MNZrbezD5/nPUa0+ZNLuJt0yfx\n4PONmvpcRMaNKAnjUwSPoJ4BVofbqpEuMrMYwboZlwHzgKvNbN6gYpcB9eF2A3BreLwX+G/uPg9Y\nBNw0xLXj2kfOmcLmvW2s39WS6lBERCIZMWG4e90Q28wIn70A2OLuDe7eDdwDLB5UZjFwlweWAyVm\nVuPuuwdW9HP3VmAjMOW4ajbGffDMGrJjGTywRkuLiMj4MGzCMLOLwtePDrVF+OwpQOJCEI0c/UN/\nxDJmNoNg0aYVEb5z3CjJy+bCuRUsfXEXvX39qQ5HRGREx7rDeE/4+qEhtg8mOS4AzKwA+CXwBXcf\n8tmNmd1gZqvMbFVTU9NohHXSfOScqexv6+KpLftTHYqIyIiGHYfh7reErye69sVOoDbh/VSOXtp1\n2DJmlkWQLO529weOEedtBOuNM3/+/HHVgnzh3AqKc7N4YM1OLpxTmepwRESOacSBewBm9gGC6UHi\nA8fc/RsjXLYSqDezOoIkcBXwiUFllgI3m9k9wEKg2d13m5kBtwMb3f07kWoyDuVkxvjQWTXct6qR\nptYuKgpzUh2SiMiwoqyHsQS4kmBOKQM+Boy4EpC79wI3A48SNFrf6+7rzexGM7sxLLYMaAC2AD8C\nPhMefydwLXCRmb0QbpcfV83GieveMQMHvnTvC/T3j6sbJBGZYGykcQBmttbdz0x4LQAedvd3jU6I\n0c2fP99XrRqxx++Y8/MVr/EPD67j794/h5sunJXqcERkAjGz1e4+P0rZKOMwOsLXdjObDPQANSca\nnBzt6gW1fOisyfz7Y5t4btvBVIcjIjKkKAnjITMrAf4NWAO8SjDNuZwkZsb/+sjpTC/L53O/eJ7m\njp5UhyQicpQoA/f+xd0Pu/svCdou5rr7f09+aBNLYTyL7111Dk1tXfzP325IdTgiIkcZtpfUsQbn\nmRnH6uoqJ+aMqcX8zbtn8v/+sJUPnDmZ98yuSHVIIiKvO1a32g8d45wDShhJ8LmL63lsw16+9su1\nPPrFd1MYz0p1SCIiwLEH7p3ogD15C+JZMf71ijO54tZn+Mzda7hobiW1k/JYMLOUIiUPEUmhKCvu\nlQG3AOcT3Fn8CfiGux9IcmwT1rnTJvHl98/h+09s4alXgmlD5lYX8tBnzyczFqWfgojIyRflp889\nQBPw58AV4f5/JjMogc9cMIsN33g/q/7pvXzzo2fw8p5WfrZ8e6rDEpEJLErCqAl7Sm0Lt/8BVCU7\nMAk6F5QX5HDV22s5f1Y533l8MwePdKc6LBGZoKIkjMfM7Cozywi3jxNM9yGjxMy45UPzONLdx78/\ntinV4YjIBBUlYfw18HOgK9zuAf7GzFrNTMvFjZL6qkKuXTSdnz/3Gusam1MdjohMQFEG7hW6e4a7\nZ4VbRnis0N2LRiNICXzxvbMpzcvmz299hr+/fy1b9rWlOiQRmUCizFb7l4Pex8zsluSFJMMpzsvi\nVze9kyvfXsuvXtjJJf/7j/zwj1tTHZaITBBRHkldbGbLzKzGzE4HlgOFSY5LhlFbmse//NnpPP3V\ni7j0tGq++fDLPLR2V6rDEpEJYMRxGO7+CTO7ElgHHAE+4e5PJz0yOabyghz+95Vn09S6gi/d+yKT\nS3I5d9qkVIclImksyiOpeuDzBMulbgeuNbO8ZAcmI4tnxfjhtW+juijODXet4u4V22loamOkNU5E\nRE5ElCVafwPc5O5PhEunfolg+dXTkhqZRFJWkMOd17+d6+58jn988CUAaorjXL1gGtcums6k/OwU\nRygi6SLKintF7t4y6Nhsd9+c1MhOwHhdce9kcHe27T/Csw0HeHT9Xp7c3ERuVoyrFtTylffPJTc7\nluoQRWQMOikr7pnZVwDcvcXMPjbo9HUnHp4kg5kxs6KAv1g4nbs+vYBHv/BuPnBmDT955lU+/sNn\n2dPcmeoQRWScO1YbxlUJ+18bdO7SJMQiJ9Gc6kK+/bGz+NG182loamPxD/7E6u2H1L4hIifsWG0Y\nNsz+UO9ljHrvvCru/9t38Fc/XcWf3/oMk/KyOH1KMQvrSll89hRqS9V/QUSiOVbC8GH2h3ovY9ip\nNUX85rPn89t1u3mpsZm1O5v59mOb+fZjm1kwo5T3n17NwrpSTq0pIpah3wVEZGjDNnqbWR/BuAsD\ncoH2gVNA3N3H3Go+E7nR+3jtPNzBr57fyYPP73x9ipHCnEzeM6eCD501mQvmVJCTqYZykXR3PI3e\nI/aSGk+UME7MrsMdrHz1IM9uPcDjG/Zy4Eg3hfFM3lVfznmnlHP+rHLqyvNTHaaIJMGYSRhmdinw\nXSAG/NjdvzXovIXnLye4g7nO3deE5+4APgjsc/fTo3yfEsZb19vXzzNbD/Dbtbt56pUmdoW9q957\nahX/cPlcZlYUpDhCETmZxkTCMLMYsBm4BGgkGOx3tbtvSChzOfBZgoSxEPiuuy8Mz70baAPuUsJI\nDXdn+4F2Hlq7iyV/bKCzp4+rF0zj1JoiSvKyqC6Oc9bUErV7iIxjx5Mwooz0PlELgC3u3hAGdQ+w\nGNiQUGYxQUJwYLmZlZhZjbvvdvcnzWxGEuOTEZgZM8rzufmieq58+zS+8/gm7l6xnf6E3zEqCnO4\n/PRqLphTSWVRDhUFOZTmZ2vtcZE0lMyEMQXYkfC+keAuYqQyU4DdSYxLTkBFYQ7f/OiZ3PKh0zjc\n3sPhjm42723j4XW7uWflDn767BvrjZtBWX425QU5LKgr5a/On8m0MnXfFRnvkpkwRoWZ3QDcADBt\n2rQUR5P+4lkxqotjVBfHmVtdxIfPmkxbVy8bd7ewv7WLprau8LWbPc0d/OK51/jZ8u188MzJXPn2\nWubPmKTeVyLjVDITxk6gNuH91PDY8ZY5Jne/DbgNgjaM4w9T3qqCnEzePqN0yHN7mju54+lt3L18\nO0tf3EU8K4NFM8uYV1PEjLJ8ZpTnc+bUYuJZSiIiY10yE8ZKoN7M6giSwFXAJwaVWQrcHLZvLASa\n3V2Po9JIdXGcf7j8VD5/cT3LGw7w5OYmnt56gD+9sp/esDEkJzODBXWlLJpZRm1pHpWFOdSW5jGl\nJDfF0YtIoqQlDHfvNbObgUcJutXe4e7rzezG8PwSYBlBD6ktBN1qrx+43sx+AVwAlJtZI3CLu9+e\nrHglufJzMrn41CouPrUKCLrv7jrcyea9rTyz9QBPvdLEvz266U3X1Jbm8s5TyllQV8rc6iJOqczX\n4yyRFNLAPRkzmjt62NfSyd6WLrbsa+XprQdY3nCA1s5eAGIZxqk1hbyrvoJ31Zdzdm0JednjvhlO\nJKXGxDiMVFDCSD+9ff007D/Cy3ta2bSnhZWvHmLN9kOvP86qKMxhemke1cVxygtyKC/IZm51EWdP\nK6G8ICfF0YuMfWNlHIbIW5YZy2B2VSGzqwrhrMkAtHb2sLzhIJv2tLD9QDvbD7azYVcLTW1dr9+N\nQPBIa3ZlIadUFnBKRT5zqouYXVWguxKRE6T/OTLuFMazuGReFZfMqzrqXHt3L+t3tfD8a4d4cUcz\nW5vaeGrLfrp7+4FgjMjsykI+cu4UPnrOFCqL4qMdvsi4pUdSkvb6+p0dB9vDx1qtPPVKE6u2HyKW\nYSyYEUzrPqe6gJkVBUydlEtVYZwMTXciE4TaMERG0NDUxn2rG3l6y35e2dtGR0/f6+eyYxnUlMSZ\nUpLLlJJcTqksYE51IadWF1FVlEMwZ6ZIelDCEDkO/f3OjkPtbNt/hB2HOmg82E7j4Q52He6g8VAH\nTa1dr5etKsph/vRSzplWwtRJeVQV5VBZFGdSXha5WTElExl31OgtchwyMozpZflMLxt6zY/D7d1s\n2tPKxt0trHntMKu3H+K3644eX5qdmUFFQQ6zKguoryygpiSXmEEslsHUklwW1JWSn6P/cjJ+6Q5D\n5AQcaOtiT0sne1s62dfSxaH2Hg63d7O7uZMt+9rY2tRGV9jQPiArZpwzbRJnTClmRlke08vyqSvP\nZ0pJrtpMJGV0hyGSZGUFOZQV5HDa5OIhz/f1O22dvfS509vXz+a9bfxpy36e2bqfu1dsp7PnjWQS\nz8qgrryAU6sLObWmiHmTi5hXU8Sk/OzRqo5IJEoYIkkQyzCK895Y9r6yKM759eVAsDDVvtYutu0/\nwrb9R9i6r40tTW08s/UADzz/xtybNcVxZlcVUlaQzaS8bGqK45wxpZjTpxTr0ZakhP7ViYwyM6Oq\nKE5VUZxFM8vedO7gkW427Gphw+5m1u9qYWtTG1v2tXGovZv27qAnV4ZBVVGc3OwYedkxiuJZTMrP\npjQvm9nVhSyqK2VWZYEa4OWkU8IQGUNK87M5v7789buRRPvbuljX2MwLOw6z83AHHd19tHf30trZ\ny8ZdLexDF/n/AAAOE0lEQVRv66IlHOlemp/NrIoCJpfEqSnJJT87RnZmBvGsGIXxTIriWVQU5jC3\nuojsTK2OKNEoYYiME+UFOVw4t5IL51YOed7dee1gOyu2HWTltoNsP9jOqu2H2LN29+tzbw0Wz8rg\n7NoSzppaQmVRnMrCHKqK4tQUx6kujpOlpXYlgRKGSJowe6N78Mfnv7EumbvT2+909fbT2dNHa2cv\nLR097DzcwcpXD7Ly1YPc+fSrdPf1D/o8mJSXTUleFpPysplTXcjCulIW1pVpAOMEpW61IoK7c7i9\nh32tQXfh3Yc72NXcyYG2Lg6393DgSBfrd7bQ2hU88sqwYI2T4twsppXmMbMin+ml+RTEM19vV6kp\niTO5JJeieNYI3y6ppG61InJczIxJ+dlMyg/uJIbS29fPxt2trN5+kANHumnt7OVQezevHmjn1y/s\netNMwYlK87M5fUoxZ04pZkZ5PsW5WRTFM6kpzmXKpFxiGoMybihhiEgkmbEMzphazBlTjx574u60\ndPbS3t1Le3cfzR097D7cyc7D7WzZ18a6nS3c+set9A1qS8nOzKCuLJ/ywmwKcjIpjGcxuTjO1NI8\npk4K7k7ysmMU52ZRmp+tx2AppoQhIm+ZmVGcm0VxbsLjp2lvLtPZ08e+li5aOnto7uhh56EOtja1\nsbXpCIfau9nf2k5rZw97WjoZqo1+Ul4Ws6sKmVmRT1E8i4KcTKqK4pw+pZj6qgI10I8CJQwRGRXx\nrBjTyvJGLNfT18+uwx3sPNRBW1dwx3LwSDev7Aump398w15aO3vfNPVKTmYG08vyKMvPoawgm+Lc\nLArimRTmZFKQk0l+ePdSWZRDTXGcysK4HoWdACUMERlTsmIZx5wMckBPXz+NhzpY23iYtY3NNB5q\n50BbN+t3tdDS0UNrV+/rC2cNFsswaorjTJ2US01xLvk5MXKzYuRmZ5KfHSMvJ5OqwhxOn1JMTXFc\nj8JCShgiMi5lxTKoKw8mcFx89pQhy3T39nOkq5e2rl6aO3rY19rJrsOd7G4O7mAaD3Xw3LaDdPb0\n0RFugzuOluVnc0plQTA6vzCH6eX5zKkqZE5V4Zumf5kIlDBEJG1lZ2aQnRn0/gpGpgw9WeSA/n6n\ns7ePtq5eGg918NLOZtY1NrP9YDvrGg/zeEvnmyaOXFhXyrXnTef9p1VPiDYUJQwRkVBGhpGXnUle\ndiaVhXHOnTbpTefdnd3NnWze28raxmbuW72Dm3/+PGX52dSFXYYn5WcHPb0mBT29akvzmFySHt2H\nNXBPROQE9fU7T25u4tcv7GRfazDI8eCRbva2dr7p0VZWzDilooDzZ5XzrtkVLKwrJZ4VS13gCcbM\nEq1mdinwXSAG/NjdvzXovIXnLwfagevcfU2Ua4eihCEiY0FPXz97mjvZcbCd7Qfb2X6gnXU7D7Py\n1UN09/YzvSyP//j0wki9xpJtTIz0NrMY8APgEqARWGlmS919Q0Kxy4D6cFsI3AosjHitiMiYlBXL\noLY0j9rSPN6RcLyju48/bm7iqw+s5Yolz3DXXy5gbnVRyuI8Xslsw1gAbHH3BgAzuwdYDCT+0F8M\n3OXBbc5yMysxsxpgRoRrRUTGldzsGJeeXs3MinyuvX0FH1/yLDdfNIuczBgZGUbMjMwMIyvTqCiI\nMzmcj2usPL5KZsKYAuxIeN9IcBcxUpkpEa8VERmXZlcVcv+N7+BTdz7H/1r28ojlszMzKMzJJC8n\nRoYZRjC6PniFsvwc7r3xvKTHPe57SZnZDcANANOmTRuhtIjI2FBbmsfjX3wPLR099LvT505/P/T2\n99Pd28++1i52He5gd3MnLZ09tHUGo977w3bnfg96bTlQOEpL9ibzW3YCtQnvp4bHopTJinAtAO5+\nG3AbBI3eby1kEZHRE8sIZgkeysyKglGOZmTJHGmyEqg3szozywauApYOKrMU+KQFFgHN7r474rUi\nIjKKknaH4e69ZnYz8ChB19g73H29md0Ynl8CLCPoUruFoFvt9ce6NlmxiojIyDRwT0RkAjuecRjp\nP/mJiIicFEoYIiISiRKGiIhEooQhIiKRKGGIiEgkadVLysyagO0neHk5sP8khjMWTYQ6wsSo50So\nI0yMeqa6jtPdvSJKwbRKGG+Fma2K2rVsvJoIdYSJUc+JUEeYGPUcT3XUIykREYlECUNERCJRwnjD\nbakOYBRMhDrCxKjnRKgjTIx6jps6qg1DREQi0R2GiIhEMuEThpldamabzGyLmX011fGcLGZWa2a/\nN7MNZrbezD4fHi81s8fN7JXwdVKqY32rzCxmZs+b2UPh+7SqY7h08f1m9rKZbTSz89KtjgBm9sXw\n3+pLZvYLM4uP93qa2R1mts/MXko4NmydzOxr4c+iTWb2/tREPbwJnTDMLAb8ALgMmAdcbWbzUhvV\nSdML/Dd3nwcsAm4K6/ZV4Al3rweeCN+Pd58HNia8T7c6fhd4xN3nAmcR1DWt6mhmU4DPAfPd/XSC\nZQ2uYvzX8yfApYOODVmn8P/nVcBp4TX/L/wZNWZM6IQBLAC2uHuDu3cD9wCLUxzTSeHuu919Tbjf\nSvBDZgpB/X4aFvsp8GepifDkMLOpwAeAHyccTps6mlkx8G7gdgB373b3w6RRHRNkArlmlgnkAbsY\n5/V09yeBg4MOD1enxcA97t7l7tsI1glaMCqBRjTRE8YUYEfC+8bwWFoxsxnAOcAKoCpc1RBgD1CV\norBOlv8DfAXoTziWTnWsA5qAO8PHbj82s3zSq464+07g28BrwG6C1TcfI83qGRquTmP+59FETxhp\nz8wKgF8CX3D3lsRzHnSRG7fd5Mzsg8A+d189XJnxXkeC37rPBW5193OAIwx6LJMGdSR8jr+YIEFO\nBvLN7JrEMulQz8HGW50mesLYCdQmvJ8aHksLZpZFkCzudvcHwsN7zawmPF8D7EtVfCfBO4EPm9mr\nBI8TLzKzn5FedWwEGt19Rfj+foIEkk51BHgvsM3dm9y9B3gAeAfpV08Yvk5j/ufRRE8YK4F6M6sz\ns2yCBqelKY7ppDAzI3juvdHdv5NwainwqXD/U8CvRzu2k8Xdv+buU919BsHf3e/c/RrSq457gB1m\nNic8dDGwgTSqY+g1YJGZ5YX/di8maHdLt3rC8HVaClxlZjlmVgfUA8+lIL5hTfiBe2Z2OcFz8Bhw\nh7v/zxSHdFKY2fnAU8A63ni+/w8E7Rj3AtMIZvb9uLsPbpQbd8zsAuDL7v5BMysjjepoZmcTNOpn\nAw3A9QS/7KVNHQHM7OvAlQQ9/J4H/gooYBzX08x+AVxAMCPtXuAW4FcMUycz+0fg0wR/Bl9w94dT\nEPawJnzCEBGRaCb6IykREYlICUNERCJRwhARkUiUMEREJBIlDBERiUQJQ8YcM+szsxfCWUvvM7O8\nYcotM7OSE/j8yWZ2/1uI71UzKz/R68cLM7vOzCanOg4ZO5QwZCzqcPezw1lLu4EbE09aIMPdLw8n\n4jsu7r7L3a84WcGmsesIpukQAZQwZOx7CphlZjPCNQLuAl4Cagd+0w/PbTSzH4XrKTxmZrkAZjbL\nzP7LzF40szVmdkpY/qXw/HVm9msz+0O4PsEtA19sZr8ys9XhZ94wUqAWrK2yJvyuJ8JjpeHnrDWz\n5WZ2Znj8n83sp2b2lJltN7OPmtm/mtk6M3sknNZl4G5m4PhzZjYrPD7DzH4Xfu4TZjYtPP4TM/ue\nmT1jZg1mdkVCfH9nZivDa76e8DlH/dmF180H7g7v9nLN7FsWrK+y1sy+fRL+bmW8cXdt2sbUBrSF\nr5kE0yb8LTCDYMT6ooRyrxKMoJ1BMDL27PD4vcA14f4K4CPhfpxg2uwZwEvhsesIZkctA3IJktH8\n8Fxp+DpwvCzxewfFXEEw02jdoGu/D9wS7l8EvBDu/zPwJyCLYI2LduCy8NyDwJ8lfNc/hvufBB4K\n938DfCrc/zTwq3D/J8B9BL8MziOYvh/gfQRrR1t47iGCadOP9Wf3h4Q/izJgE28M9i1J9b8TbaO/\n6Q5DxqJcM3sBWEUwx9Dt4fHt7r58mGu2ufsL4f5qYIaZFQJT3P1BAHfvdPf2Ia593N0PuHsHwaR3\n54fHP2dmLwLLCSaFqz9GzIuAJz1YxwB/Y/qK84H/CI/9Digzs6Lw3MMeTLS3jmBqmkfC4+sIfpAP\n+EXC63nh/nnAz8P9/0iIGYLk0e/uG3hj6uz3hdvzwBpgbkJ9jvqzG6J+zUAncLuZfZQgwckEk5nq\nAESG0OHuZyceCOaj48gxrulK2O8juCuIavD8OB7OTfVe4Dx3bzezPxDcoZxMXQDu3m9mPe4+EEc/\nb/6/6cPsH/NzQ5bw+k13/2FiQQvWShnxz87de81sAcGkgFcANxPcMckEojsMSVserDTYaGZ/BhDO\nAjpUj6tLwraGXILVz54GioFDYbKYS3AHcSzLgXeHs4xiZqXh8aeAvwiPXQDs90HrkkRwZcLrs+H+\nMwQz9BJ+/lMjfMajwKctWB8FM5tiZpUjXNMKFIblC4Bid18GfJHgMZpMMLrDkHR3LfBDM/sG0AN8\njDevzgfBFNK/JFh/4GfuvsrM1gE3mtlGgmf3wz0KA8Ddm8KG8QfMLINgjYNLCNoq7jCztQSPcT41\n/KcMa1J4fRdwdXjsswSr8P0dwYp8148Q32NmdirwbHi31gZcQ3BHMZyfAEvMrINg3ftfm1mc4G7l\nSydQDxnnNFutTGhmdh1Bw+7NqY5lKBYsDjXf3fenOhYRPZISEZFIdIchIiKR6A5DREQiUcIQEZFI\nlDBERCQSJQwREYlECUNERCJRwhARkUj+P+Cuq/sp7mpkAAAAAElFTkSuQmCC\n",
871 | "text/plain": [
872 | ""
873 | ]
874 | },
875 | "metadata": {},
876 | "output_type": "display_data"
877 | }
878 | ],
879 | "source": [
880 | "#PCA In Sklearn\n",
881 | "from sklearn.decomposition import PCA\n",
882 | "\n",
883 | "pca_full = PCA(n_components = None)\n",
884 | "pca_full.fit(X)\n",
885 | "\n",
886 | "plt.plot(range(0,107), pca_full.explained_variance_ratio_)\n",
887 | "plt.ylabel('Explained variance ratio')\n",
888 | "plt.xlabel('Principal components')\n",
889 | "#plt.xticks(np.arange(0,100,5))\n",
890 | "plt.show()"
891 | ]
892 | },
893 | {
894 | "cell_type": "markdown",
895 | "metadata": {},
896 | "source": [
897 | "#### From above figure, we will choose 80 as the dimensions of Principal components."
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 20,
903 | "metadata": {
904 | "collapsed": false
905 | },
906 | "outputs": [
907 | {
908 | "name": "stdout",
909 | "output_type": "stream",
910 | "text": [
911 | "[[-0.11279008 0.43894183 -3.99012024 ..., 1.41998245 0.4333418\n",
912 | " 0.50221114]\n",
913 | " [-0.46084816 0.17902204 -3.58830904 ..., 1.60322019 0.17307902\n",
914 | " 0.3332049 ]\n",
915 | " [-2.61677928 2.52343016 -5.04277486 ..., 1.53863346 0.27665 0.10085208]\n",
916 | " ..., \n",
917 | " [ 4.29349456 3.1139787 8.86817852 ..., 1.59718786 -0.59968411\n",
918 | " -0.65606429]\n",
919 | " [ 3.06349133 4.87790257 7.91930136 ..., 0.8040673 -1.19636896\n",
920 | " 0.58418913]\n",
921 | " [ 1.87201368 5.70640416 7.50898992 ..., 1.19867262 -0.93258426\n",
922 | " 0.58532392]]\n",
923 | "\n",
924 | "[ 0.0599287 0.05130629 0.04778579 0.04120846 0.03794963 0.03442235\n",
925 | " 0.02928028 0.02648273 0.02516247 0.02253369 0.02148449 0.01909422\n",
926 | " 0.01792891 0.01660708 0.01649287 0.01499748 0.01459388 0.0135756\n",
927 | " 0.01335547 0.01295986 0.01223028 0.01205561 0.01173874 0.0115274\n",
928 | " 0.01122597 0.01099925 0.01050502 0.01044227 0.01014913 0.0100701\n",
929 | " 0.00998956 0.00958084 0.00934085 0.009218 0.00898172 0.00889459\n",
930 | " 0.00866534 0.00851304 0.008274 0.00815345 0.00807838 0.00788843\n",
931 | " 0.0077532 0.00759841 0.00726425 0.00711507 0.00700392 0.0068995\n",
932 | " 0.00674679 0.00654725 0.00645781 0.00639411 0.00625 0.00623941\n",
933 | " 0.00600632 0.00588609 0.00575109 0.00558769 0.00542205 0.00535553\n",
934 | " 0.00517615 0.00514075 0.00507192 0.00485718 0.00475494 0.00472658\n",
935 | " 0.00467574 0.00446718 0.00438275 0.00428521 0.00421698 0.00400091\n",
936 | " 0.00394904 0.0038194 0.00378123 0.00367514 0.00348699 0.0034291\n",
937 | " 0.00330351 0.00324087]\n"
938 | ]
939 | }
940 | ],
941 | "source": [
942 | "x_scaled = StandardScaler().fit_transform(X)\n",
943 | "pca = PCA(n_components = 80)\n",
944 | "x_pca = pca.fit_transform(x_scaled)\n",
945 | "print(x_pca, end = '\\n\\n')\n",
946 | "print(pca.explained_variance_ratio_)"
947 | ]
948 | },
949 | {
950 | "cell_type": "code",
951 | "execution_count": 21,
952 | "metadata": {
953 | "collapsed": false
954 | },
955 | "outputs": [
956 | {
957 | "data": {
958 | "text/plain": [
959 | "0.95439228841885837"
960 | ]
961 | },
962 | "execution_count": 21,
963 | "metadata": {},
964 | "output_type": "execute_result"
965 | }
966 | ],
967 | "source": [
968 | "sum(pca.explained_variance_ratio_)"
969 | ]
970 | },
971 | {
972 | "cell_type": "code",
973 | "execution_count": 22,
974 | "metadata": {
975 | "collapsed": false
976 | },
977 | "outputs": [
978 | {
979 | "data": {
980 | "text/plain": [
981 | "(8378, 80)"
982 | ]
983 | },
984 | "execution_count": 22,
985 | "metadata": {},
986 | "output_type": "execute_result"
987 | }
988 | ],
989 | "source": [
990 | "x_pca.shape"
991 | ]
992 | },
993 | {
994 | "cell_type": "markdown",
995 | "metadata": {},
996 | "source": [
997 | "# 2. Model Training\n",
998 | "We'll do three models and compare the prediction results:\n",
999 | " - 2.1 Manually made neural network\n",
1000 | " - 2.2 Tensorflow\n",
1001 | " - 2.3 SVM\n",
1002 | " - 2.3 Logistic regression"
1003 | ]
1004 | },
1005 | {
1006 | "cell_type": "markdown",
1007 | "metadata": {},
1008 | "source": [
1009 | "### Train and Test split"
1010 | ]
1011 | },
1012 | {
1013 | "cell_type": "code",
1014 | "execution_count": 23,
1015 | "metadata": {
1016 | "collapsed": true
1017 | },
1018 | "outputs": [],
1019 | "source": [
1020 | "from sklearn.model_selection import train_test_split\n",
1021 | "\n",
1022 | "X_train, X_test, y_train, y_test = train_test_split(x_pca, df_labels, test_size=0.2, random_state=0)"
1023 | ]
1024 | },
1025 | {
1026 | "cell_type": "code",
1027 | "execution_count": 24,
1028 | "metadata": {
1029 | "collapsed": false,
1030 | "scrolled": true
1031 | },
1032 | "outputs": [
1033 | {
1034 | "data": {
1035 | "text/plain": [
1036 | "(6702, 80)"
1037 | ]
1038 | },
1039 | "execution_count": 24,
1040 | "metadata": {},
1041 | "output_type": "execute_result"
1042 | }
1043 | ],
1044 | "source": [
1045 | "X_train.shape"
1046 | ]
1047 | },
1048 | {
1049 | "cell_type": "code",
1050 | "execution_count": 25,
1051 | "metadata": {
1052 | "collapsed": false
1053 | },
1054 | "outputs": [
1055 | {
1056 | "data": {
1057 | "text/plain": [
1058 | "array([[-4.68464119, -2.8248629 , -1.51037084, ..., -0.08057486,\n",
1059 | " -0.71625464, -0.10307144],\n",
1060 | " [-0.74018427, 1.56521961, -0.06997587, ..., 0.01645834,\n",
1061 | " 0.45644289, -0.24637226],\n",
1062 | " [ 1.74612994, -3.12552681, 1.71795705, ..., 0.16907101,\n",
1063 | " 0.4087692 , -0.38171825],\n",
1064 | " ..., \n",
1065 | " [ 0.75099882, -2.49960586, -1.51160927, ..., -0.68449073,\n",
1066 | " 0.04991678, -1.01516311],\n",
1067 | " [-3.93462896, 2.8487166 , -1.62335803, ..., 0.61735951,\n",
1068 | " 0.13858547, 0.21935022],\n",
1069 | " [-1.56477143, -1.87060714, -0.83035874, ..., -0.58688186,\n",
1070 | " -0.59803885, -0.11533395]])"
1071 | ]
1072 | },
1073 | "execution_count": 25,
1074 | "metadata": {},
1075 | "output_type": "execute_result"
1076 | }
1077 | ],
1078 | "source": [
1079 | "X_train"
1080 | ]
1081 | },
1082 | {
1083 | "cell_type": "markdown",
1084 | "metadata": {},
1085 | "source": [
1086 | "## 2.1 Manual Neural Network\n",
1087 | " - 2.1.1 Build the Neural Network\n",
1088 | " - 2.1.2 Set the hyperparameters, train the NN and evaluate\n",
1089 | " - 2.1.3 Adapt SGD method to improve the accuracy"
1090 | ]
1091 | },
1092 | {
1093 | "cell_type": "markdown",
1094 | "metadata": {},
1095 | "source": [
1096 | "### 2.1.1 Build the neural network"
1097 | ]
1098 | },
1099 | {
1100 | "cell_type": "code",
1101 | "execution_count": 96,
1102 | "metadata": {
1103 | "collapsed": false
1104 | },
1105 | "outputs": [],
1106 | "source": [
1107 | "class MyNeuralNetwork(object):\n",
1108 | " def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):\n",
1109 | " # Set number of nodes in input, hidden and output layers.\n",
1110 | " self.input_nodes = input_nodes\n",
1111 | " self.hidden_nodes = hidden_nodes\n",
1112 | " self.output_nodes = output_nodes\n",
1113 | "\n",
1114 | " # Initialize weights\n",
1115 | " self.weights_0_1 = np.zeros((self.hidden_nodes,self.input_nodes))\n",
1116 | "\n",
1117 | " self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, \n",
1118 | " (self.output_nodes, self.hidden_nodes))\n",
1119 | " self.lr = learning_rate\n",
1120 | " \n",
1121 | " #### Set this to your implemented sigmoid function ####\n",
1122 | " # Activation function is the sigmoid function\n",
1123 | " self.sigmoid_activation = lambda x : 1 / (1 + np.exp(-x))\n",
1124 | " self.sigmoid_output_2_derivative = lambda x: x * (1 - x)\n",
1125 | " \n",
1126 | " def train(self, inputs_array, targets_array):\n",
1127 | " # Convert inputs list to 2d array\n",
1128 | " inputs = inputs_array.T\n",
1129 | " targets = np.array(targets_array, ndmin=2)\n",
1130 | " #targets = targets_array\n",
1131 | " m = inputs_array.shape[0] # number of records\n",
1132 | " \n",
1133 | " #### Implement the forward pass here ####\n",
1134 | " ### Forward pass ###\n",
1135 | " # TODO: Hidden layer\n",
1136 | " layer_1_inputs = np.dot(self.weights_0_1, inputs) # signals into hidden layer\n",
1137 | " layer_1 = layer_1_inputs # signals from hidden layer\n",
1138 | " \n",
1139 | " # TODO: Output layer\n",
1140 | " layer_2_inputs = np.dot(self.weights_1_2,layer_1) # signals into final output layer\n",
1141 | " layer_2 = self.sigmoid_activation(layer_2_inputs) # signals from final output layer\n",
1142 | " \n",
1143 | " #### Implement the backward pass here ####\n",
1144 | " ### Backward pass ###\n",
1145 | " \n",
1146 | " # TODO: Output error \n",
1147 | " layer_2_errors = targets - layer_2 # Output layer error is the difference between desired target and actual output.\n",
1148 | " layer_2_delta = layer_2_errors * self.sigmoid_output_2_derivative(layer_2)\n",
1149 | " \n",
1150 | " # TODO: Backpropagated error\n",
1151 | " layer_1_errors = np.dot(self.weights_1_2.T,layer_2_delta) # errors propagated to the hidden layer 2x128\n",
1152 | " layer_1_delta = layer_1_errors # hidden layer gradients y = x -> 1\n",
1153 | " \n",
1154 | " # TODO: Update the weights\n",
1155 | " self.weights_1_2 += self.lr*np.dot(layer_2_delta,layer_1.T)/m # update hidden-to-output weights with gradient descent step\n",
1156 | " self.weights_0_1 += self.lr*np.dot(layer_1_delta,inputs.T)/m # update input-to-hidden weights with gradient descent step\n",
1157 | " \n",
1158 | " \n",
1159 | " def run(self, inputs_list):\n",
1160 | " # Run a forward pass through the network\n",
1161 | " inputs = np.array(inputs_list, ndmin=2).T\n",
1162 | " \n",
1163 | " #### Implement the forward pass here ####\n",
1164 | " # TODO: Hidden layer\n",
1165 | " hidden_inputs = np.dot(self.weights_0_1, inputs) # signals into hidden layer\n",
1166 | " hidden_outputs = hidden_inputs # signals from hidden layer\n",
1167 | " \n",
1168 | " # TODO: Output layer\n",
1169 | " final_inputs = np.dot(self.weights_1_2,hidden_outputs) # signals into final output layer\n",
1170 | " final_outputs = self.sigmoid_activation(final_inputs) # signals from final output layer \n",
1171 | " \n",
1172 | " return final_outputs"
1173 | ]
1174 | },
1175 | {
1176 | "cell_type": "markdown",
1177 | "metadata": {},
1178 | "source": [
1179 | "### 2.1.2 Train the model and evaluation"
1180 | ]
1181 | },
1182 | {
1183 | "cell_type": "code",
1184 | "execution_count": 97,
1185 | "metadata": {
1186 | "collapsed": false
1187 | },
1188 | "outputs": [
1189 | {
1190 | "name": "stdout",
1191 | "output_type": "stream",
1192 | "text": [
1193 | "0.658711217184\n"
1194 | ]
1195 | }
1196 | ],
1197 | "source": [
1198 | "from sklearn import metrics\n",
1199 | "### Set the hyperparameters here ###\n",
1200 | "epochs = 100 #100\n",
1201 | "learning_rate = 0.01 #0.1\n",
1202 | "hidden_nodes = 10 \n",
1203 | "output_nodes = 1\n",
1204 | "\n",
1205 | "N_i = X_train.shape[1]\n",
1206 | "network = MyNeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)\n",
1207 | "\n",
1208 | "for e in range(epochs):\n",
1209 | " network.train(X_train, y_train)\n",
1210 | " \n",
1211 | "y_pred = network.run(X_test)\n",
1212 | "y_pred = np.where(y_pred >= 0.5, 1, 0) # if probability >= 0.5, it is 1, else 0\n",
1213 | "\n",
1214 | "print(metrics.accuracy_score(y_test,y_pred[0]))"
1215 | ]
1216 | },
1217 | {
1218 | "cell_type": "markdown",
1219 | "metadata": {},
1220 | "source": [
1221 | "### 2.1.3 SGD"
1222 | ]
1223 | },
1224 | {
1225 | "cell_type": "code",
1226 | "execution_count": 98,
1227 | "metadata": {
1228 | "collapsed": false
1229 | },
1230 | "outputs": [],
1231 | "source": [
1232 | "#N_i = X_train.shape[1]\n",
1233 | "network = MyNeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)\n",
1234 | "\n",
1235 | "random_row_idx = np.zeros(128)\n",
1236 | "for e in range(epochs):\n",
1237 | " random_row_idx = np.random.choice(X_train.shape[0],size=128)\n",
1238 | " X_batch = X_train[random_row_idx,:]\n",
1239 | " y_batch = y_train[random_row_idx]\n",
1240 | " network.train(X_batch, y_batch)"
1241 | ]
1242 | },
1243 | {
1244 | "cell_type": "code",
1245 | "execution_count": 99,
1246 | "metadata": {
1247 | "collapsed": false
1248 | },
1249 | "outputs": [
1250 | {
1251 | "name": "stdout",
1252 | "output_type": "stream",
1253 | "text": [
1254 | "0.839498806683\n"
1255 | ]
1256 | },
1257 | {
1258 | "name": "stderr",
1259 | "output_type": "stream",
1260 | "text": [
1261 | "C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\ipykernel\\__main__.py:2: RuntimeWarning: invalid value encountered in greater_equal\n",
1262 | " from ipykernel import kernelapp as app\n"
1263 | ]
1264 | }
1265 | ],
1266 | "source": [
1267 | "y_pred = network.run(X_test)\n",
1268 | "y_pred = np.where(y_pred >= 0.5, 1, 0) # if probability >= 0.5, it is 1, else 0\n",
1269 | "print(metrics.accuracy_score(y_test,y_pred[0]))"
1270 | ]
1271 | },
1272 | {
1273 | "cell_type": "markdown",
1274 | "metadata": {
1275 | "collapsed": false
1276 | },
1277 | "source": [
1278 | "#### Wow, SGD improves the accuracy dramatically !!!!"
1279 | ]
1280 | },
1281 | {
1282 | "cell_type": "markdown",
1283 | "metadata": {},
1284 | "source": [
1285 | "## 2.2 Tensorflow"
1286 | ]
1287 | },
1288 | {
1289 | "cell_type": "code",
1290 | "execution_count": 31,
1291 | "metadata": {
1292 | "collapsed": true
1293 | },
1294 | "outputs": [],
1295 | "source": [
1296 | "import tensorflow as tf"
1297 | ]
1298 | },
1299 | {
1300 | "cell_type": "code",
1301 | "execution_count": 32,
1302 | "metadata": {
1303 | "collapsed": false,
1304 | "scrolled": true
1305 | },
1306 | "outputs": [
1307 | {
1308 | "name": "stdout",
1309 | "output_type": "stream",
1310 | "text": [
1311 | "WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.\n",
1312 | "WARNING:tensorflow:Using temporary folder as model directory: C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\n",
1313 | "INFO:tensorflow:Using default config.\n",
1314 | "INFO:tensorflow:Using config: {'keep_checkpoint_every_n_hours': 10000, 'keep_checkpoint_max': 5, '_evaluation_master': '', 'save_summary_steps': 100, '_task_id': 0, 'save_checkpoints_secs': 600, 'tf_config': gpu_options {\n",
1315 | " per_process_gpu_memory_fraction: 1\n",
1316 | "}\n",
1317 | ", '_is_chief': True, '_environment': 'local', 'save_checkpoints_steps': None, '_num_ps_replicas': 0, '_cluster_spec': , '_master': '', '_task_type': None, 'tf_random_seed': None}\n",
1318 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:315 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.\n",
1319 | "Instructions for updating:\n",
1320 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1321 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1322 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1323 | "Example conversion:\n",
1324 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1325 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:315 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with y is deprecated and will be removed after 2016-12-01.\n",
1326 | "Instructions for updating:\n",
1327 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1328 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1329 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1330 | "Example conversion:\n",
1331 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1332 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:315 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.\n",
1333 | "Instructions for updating:\n",
1334 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1335 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1336 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1337 | "Example conversion:\n",
1338 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1339 | "WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.\n",
1340 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.\n",
1341 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.\n",
1342 | "INFO:tensorflow:Summary name dnn/logits:fraction_of_zero_values is illegal; using dnn/logits_fraction_of_zero_values instead.\n",
1343 | "INFO:tensorflow:Summary name dnn/logits:activation is illegal; using dnn/logits_activation instead.\n",
1344 | "INFO:tensorflow:Create CheckpointSaverHook.\n",
1345 | "INFO:tensorflow:loss = 0.534811, step = 1\n",
1346 | "INFO:tensorflow:Saving checkpoints for 1 into C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\\model.ckpt.\n",
1347 | "WARNING:tensorflow:*******************************************************\n",
1348 | "WARNING:tensorflow:TensorFlow's V1 checkpoint format has been deprecated.\n",
1349 | "WARNING:tensorflow:Consider switching to the more efficient V2 format:\n",
1350 | "WARNING:tensorflow: `tf.train.Saver(write_version=tf.train.SaverDef.V2)`\n",
1351 | "WARNING:tensorflow:now on by default.\n",
1352 | "WARNING:tensorflow:*******************************************************\n",
1353 | "INFO:tensorflow:loss = 0.105281, step = 101\n",
1354 | "INFO:tensorflow:global_step/sec: 10.1418\n",
1355 | "INFO:tensorflow:loss = 0.033156, step = 201\n",
1356 | "INFO:tensorflow:global_step/sec: 10.5714\n",
1357 | "INFO:tensorflow:Saving checkpoints for 300 into C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\\model.ckpt.\n",
1358 | "WARNING:tensorflow:*******************************************************\n",
1359 | "WARNING:tensorflow:TensorFlow's V1 checkpoint format has been deprecated.\n",
1360 | "WARNING:tensorflow:Consider switching to the more efficient V2 format:\n",
1361 | "WARNING:tensorflow: `tf.train.Saver(write_version=tf.train.SaverDef.V2)`\n",
1362 | "WARNING:tensorflow:now on by default.\n",
1363 | "WARNING:tensorflow:*******************************************************\n",
1364 | "INFO:tensorflow:Loss for final step: 0.0162134.\n",
1365 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:348 in predict.: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.\n",
1366 | "Instructions for updating:\n",
1367 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1368 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1369 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1370 | "Example conversion:\n",
1371 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1372 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:348 in predict.: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.\n",
1373 | "Instructions for updating:\n",
1374 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1375 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1376 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1377 | "Example conversion:\n",
1378 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1379 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:348 in predict.: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with as_iterable is deprecated and will be removed after 2016-12-01.\n",
1380 | "Instructions for updating:\n",
1381 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1382 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1383 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1384 | "Example conversion:\n",
1385 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1386 | "WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.\n",
1387 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.\n",
1388 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.\n",
1389 | "INFO:tensorflow:Summary name dnn/logits:fraction_of_zero_values is illegal; using dnn/logits_fraction_of_zero_values instead.\n",
1390 | "INFO:tensorflow:Summary name dnn/logits:activation is illegal; using dnn/logits_activation instead.\n",
1391 | "INFO:tensorflow:Loading model from checkpoint: C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\\model.ckpt-300-?????-of-00001.\n"
1392 | ]
1393 | }
1394 | ],
1395 | "source": [
1396 | "# Build one layer DNN with 40 units respectively.\n",
1397 | "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
1398 | "classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[40], n_classes=2)\n",
1399 | "\n",
1400 | "# Fit and predict.\n",
1401 | "classifier.fit(X_train, y_train, steps=300)\n",
1402 | "predictions = list(classifier.predict(X_test, as_iterable=True))\n",
1403 | "score = metrics.accuracy_score(y_test, predictions)"
1404 | ]
1405 | },
1406 | {
1407 | "cell_type": "code",
1408 | "execution_count": 33,
1409 | "metadata": {
1410 | "collapsed": false
1411 | },
1412 | "outputs": [
1413 | {
1414 | "name": "stdout",
1415 | "output_type": "stream",
1416 | "text": [
1417 | "TF Accuracy: 0.999403341289\n"
1418 | ]
1419 | }
1420 | ],
1421 | "source": [
1422 | "print('TF Accuracy: ', score)"
1423 | ]
1424 | },
1425 | {
1426 | "cell_type": "markdown",
1427 | "metadata": {},
1428 | "source": [
1429 | "## 2.3 SVM"
1430 | ]
1431 | },
1432 | {
1433 | "cell_type": "code",
1434 | "execution_count": 34,
1435 | "metadata": {
1436 | "collapsed": true
1437 | },
1438 | "outputs": [],
1439 | "source": [
1440 | "from sklearn.svm import SVC\n",
1441 | "svc = SVC()\n",
1442 | "svc.fit(X_train, y_train)\n",
1443 | "y_pred = svc.predict(X_test)"
1444 | ]
1445 | },
1446 | {
1447 | "cell_type": "code",
1448 | "execution_count": 35,
1449 | "metadata": {
1450 | "collapsed": false
1451 | },
1452 | "outputs": [
1453 | {
1454 | "data": {
1455 | "text/plain": [
1456 | "array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
1457 | " 0, 0, 0, 0, 0, 0, 0], dtype=int64)"
1458 | ]
1459 | },
1460 | "execution_count": 35,
1461 | "metadata": {},
1462 | "output_type": "execute_result"
1463 | }
1464 | ],
1465 | "source": [
1466 | "y_pred[:30]"
1467 | ]
1468 | },
1469 | {
1470 | "cell_type": "code",
1471 | "execution_count": 36,
1472 | "metadata": {
1473 | "collapsed": false
1474 | },
1475 | "outputs": [
1476 | {
1477 | "name": "stdout",
1478 | "output_type": "stream",
1479 | "text": [
1480 | "SVM Accuracy: 0.994630071599\n"
1481 | ]
1482 | }
1483 | ],
1484 | "source": [
1485 | "print('SVM Accuracy: ', metrics.accuracy_score(y_test,y_pred))"
1486 | ]
1487 | },
1488 | {
1489 | "cell_type": "markdown",
1490 | "metadata": {},
1491 | "source": [
1492 | "## 2.4 Logistic regression"
1493 | ]
1494 | },
1495 | {
1496 | "cell_type": "code",
1497 | "execution_count": 37,
1498 | "metadata": {
1499 | "collapsed": false
1500 | },
1501 | "outputs": [],
1502 | "source": [
1503 | "from sklearn.linear_model import LogisticRegression\n",
1504 | "\n",
1505 | "logreg = LogisticRegression(C=1e5)\n",
1506 | "logreg.fit(X_train, y_train)\n",
1507 | "y_pred = logreg.predict(X_test)"
1508 | ]
1509 | },
1510 | {
1511 | "cell_type": "code",
1512 | "execution_count": 38,
1513 | "metadata": {
1514 | "collapsed": false
1515 | },
1516 | "outputs": [
1517 | {
1518 | "data": {
1519 | "text/plain": [
1520 | "array([0, 0, 0, ..., 0, 0, 1], dtype=int64)"
1521 | ]
1522 | },
1523 | "execution_count": 38,
1524 | "metadata": {},
1525 | "output_type": "execute_result"
1526 | }
1527 | ],
1528 | "source": [
1529 | "y_pred"
1530 | ]
1531 | },
1532 | {
1533 | "cell_type": "code",
1534 | "execution_count": 39,
1535 | "metadata": {
1536 | "collapsed": false
1537 | },
1538 | "outputs": [
1539 | {
1540 | "name": "stdout",
1541 | "output_type": "stream",
1542 | "text": [
1543 | "Log Regression Accuracy: 1.0\n"
1544 | ]
1545 | }
1546 | ],
1547 | "source": [
1548 | "print('Log Regression Accuracy: ', metrics.accuracy_score(y_test,y_pred))"
1549 | ]
1550 | },
1551 | {
1552 | "cell_type": "markdown",
1553 | "metadata": {},
1554 | "source": [
1555 | "# 3. Summary\n",
1556 | " - Manual NN: 0.839498806683. \n",
1557 | " - TF Accuracy: 0.999403341289\n",
1558 | " - SVM Accuracy: 0.994630071599\n",
1559 | " - Log Regression Accuracy: 1.0"
1560 | ]
1561 | }
1562 | ],
1563 | "metadata": {
1564 | "kernelspec": {
1565 | "display_name": "Python 3",
1566 | "language": "python",
1567 | "name": "python3"
1568 | },
1569 | "language_info": {
1570 | "codemirror_mode": {
1571 | "name": "ipython",
1572 | "version": 3
1573 | },
1574 | "file_extension": ".py",
1575 | "mimetype": "text/x-python",
1576 | "name": "python",
1577 | "nbconvert_exporter": "python",
1578 | "pygments_lexer": "ipython3",
1579 | "version": "3.5.2"
1580 | }
1581 | },
1582 | "nbformat": 4,
1583 | "nbformat_minor": 2
1584 | }
1585 |
--------------------------------------------------------------------------------
/wk5-speed-dating/Speed Dating Data.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdalai/Deep-Learning-projects/f690b3d8901e2ee7d872765815306ed09ba83a5a/wk5-speed-dating/Speed Dating Data.csv
--------------------------------------------------------------------------------
/wk5-speed-dating/Speed dating prediction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Speed dating predicton\n",
8 | " - [Kaggle Speed dating experiment](https://www.kaggle.com/annavictoria/speed-dating-experiment)\n",
9 | " - Learning fun [Siraj's DL #5](https://www.youtube.com/watch?v=koiTTim4M-s)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "collapsed": true
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%matplotlib inline\n",
21 | "import pandas as pd\n",
22 | "import numpy as np\n",
23 | "import matplotlib.pyplot as plt"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {
30 | "collapsed": false
31 | },
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/html": [
36 | "\n",
37 | "
\n",
38 | " \n",
39 | " \n",
40 | " | \n",
41 | " iid | \n",
42 | " id | \n",
43 | " gender | \n",
44 | " idg | \n",
45 | " condtn | \n",
46 | " wave | \n",
47 | " round | \n",
48 | " position | \n",
49 | " positin1 | \n",
50 | " order | \n",
51 | " ... | \n",
52 | " attr3_3 | \n",
53 | " sinc3_3 | \n",
54 | " intel3_3 | \n",
55 | " fun3_3 | \n",
56 | " amb3_3 | \n",
57 | " attr5_3 | \n",
58 | " sinc5_3 | \n",
59 | " intel5_3 | \n",
60 | " fun5_3 | \n",
61 | " amb5_3 | \n",
62 | "
\n",
63 | " \n",
64 | " \n",
65 | " \n",
66 | " 0 | \n",
67 | " 1 | \n",
68 | " 1.0 | \n",
69 | " 0 | \n",
70 | " 1 | \n",
71 | " 1 | \n",
72 | " 1 | \n",
73 | " 10 | \n",
74 | " 7 | \n",
75 | " NaN | \n",
76 | " 4 | \n",
77 | " ... | \n",
78 | " 5.0 | \n",
79 | " 7.0 | \n",
80 | " 7.0 | \n",
81 | " 7.0 | \n",
82 | " 7.0 | \n",
83 | " NaN | \n",
84 | " NaN | \n",
85 | " NaN | \n",
86 | " NaN | \n",
87 | " NaN | \n",
88 | "
\n",
89 | " \n",
90 | " 1 | \n",
91 | " 1 | \n",
92 | " 1.0 | \n",
93 | " 0 | \n",
94 | " 1 | \n",
95 | " 1 | \n",
96 | " 1 | \n",
97 | " 10 | \n",
98 | " 7 | \n",
99 | " NaN | \n",
100 | " 3 | \n",
101 | " ... | \n",
102 | " 5.0 | \n",
103 | " 7.0 | \n",
104 | " 7.0 | \n",
105 | " 7.0 | \n",
106 | " 7.0 | \n",
107 | " NaN | \n",
108 | " NaN | \n",
109 | " NaN | \n",
110 | " NaN | \n",
111 | " NaN | \n",
112 | "
\n",
113 | " \n",
114 | " 2 | \n",
115 | " 1 | \n",
116 | " 1.0 | \n",
117 | " 0 | \n",
118 | " 1 | \n",
119 | " 1 | \n",
120 | " 1 | \n",
121 | " 10 | \n",
122 | " 7 | \n",
123 | " NaN | \n",
124 | " 10 | \n",
125 | " ... | \n",
126 | " 5.0 | \n",
127 | " 7.0 | \n",
128 | " 7.0 | \n",
129 | " 7.0 | \n",
130 | " 7.0 | \n",
131 | " NaN | \n",
132 | " NaN | \n",
133 | " NaN | \n",
134 | " NaN | \n",
135 | " NaN | \n",
136 | "
\n",
137 | " \n",
138 | " 3 | \n",
139 | " 1 | \n",
140 | " 1.0 | \n",
141 | " 0 | \n",
142 | " 1 | \n",
143 | " 1 | \n",
144 | " 1 | \n",
145 | " 10 | \n",
146 | " 7 | \n",
147 | " NaN | \n",
148 | " 5 | \n",
149 | " ... | \n",
150 | " 5.0 | \n",
151 | " 7.0 | \n",
152 | " 7.0 | \n",
153 | " 7.0 | \n",
154 | " 7.0 | \n",
155 | " NaN | \n",
156 | " NaN | \n",
157 | " NaN | \n",
158 | " NaN | \n",
159 | " NaN | \n",
160 | "
\n",
161 | " \n",
162 | " 4 | \n",
163 | " 1 | \n",
164 | " 1.0 | \n",
165 | " 0 | \n",
166 | " 1 | \n",
167 | " 1 | \n",
168 | " 1 | \n",
169 | " 10 | \n",
170 | " 7 | \n",
171 | " NaN | \n",
172 | " 7 | \n",
173 | " ... | \n",
174 | " 5.0 | \n",
175 | " 7.0 | \n",
176 | " 7.0 | \n",
177 | " 7.0 | \n",
178 | " 7.0 | \n",
179 | " NaN | \n",
180 | " NaN | \n",
181 | " NaN | \n",
182 | " NaN | \n",
183 | " NaN | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
5 rows × 195 columns
\n",
188 | "
"
189 | ],
190 | "text/plain": [
191 | " iid id gender idg condtn wave round position positin1 order \\\n",
192 | "0 1 1.0 0 1 1 1 10 7 NaN 4 \n",
193 | "1 1 1.0 0 1 1 1 10 7 NaN 3 \n",
194 | "2 1 1.0 0 1 1 1 10 7 NaN 10 \n",
195 | "3 1 1.0 0 1 1 1 10 7 NaN 5 \n",
196 | "4 1 1.0 0 1 1 1 10 7 NaN 7 \n",
197 | "\n",
198 | " ... attr3_3 sinc3_3 intel3_3 fun3_3 amb3_3 attr5_3 sinc5_3 \\\n",
199 | "0 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
200 | "1 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
201 | "2 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
202 | "3 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
203 | "4 ... 5.0 7.0 7.0 7.0 7.0 NaN NaN \n",
204 | "\n",
205 | " intel5_3 fun5_3 amb5_3 \n",
206 | "0 NaN NaN NaN \n",
207 | "1 NaN NaN NaN \n",
208 | "2 NaN NaN NaN \n",
209 | "3 NaN NaN NaN \n",
210 | "4 NaN NaN NaN \n",
211 | "\n",
212 | "[5 rows x 195 columns]"
213 | ]
214 | },
215 | "execution_count": 2,
216 | "metadata": {},
217 | "output_type": "execute_result"
218 | }
219 | ],
220 | "source": [
221 | "df =pd.read_csv('Speed Dating Data.csv', encoding=\"ISO-8859-1\")\n",
222 | "df.head()"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 3,
228 | "metadata": {
229 | "collapsed": false
230 | },
231 | "outputs": [
232 | {
233 | "data": {
234 | "text/plain": [
235 | "0 0\n",
236 | "1 0\n",
237 | "2 1\n",
238 | "3 1\n",
239 | "4 1\n",
240 | "Name: match, dtype: int64"
241 | ]
242 | },
243 | "execution_count": 3,
244 | "metadata": {},
245 | "output_type": "execute_result"
246 | }
247 | ],
248 | "source": [
249 | "df['match'].head()"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": 4,
255 | "metadata": {
256 | "collapsed": false
257 | },
258 | "outputs": [
259 | {
260 | "data": {
261 | "text/plain": [
262 | "(8378, 195)"
263 | ]
264 | },
265 | "execution_count": 4,
266 | "metadata": {},
267 | "output_type": "execute_result"
268 | }
269 | ],
270 | "source": [
271 | "df.shape"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 5,
277 | "metadata": {
278 | "collapsed": false
279 | },
280 | "outputs": [
281 | {
282 | "name": "stdout",
283 | "output_type": "stream",
284 | "text": [
285 | "\n",
286 | "RangeIndex: 8378 entries, 0 to 8377\n",
287 | "Columns: 195 entries, iid to amb5_3\n",
288 | "dtypes: float64(174), int64(13), object(8)\n",
289 | "memory usage: 12.5+ MB\n"
290 | ]
291 | }
292 | ],
293 | "source": [
294 | "df.info()"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "### First of all, let's just seperate features and labels"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 6,
307 | "metadata": {
308 | "collapsed": false
309 | },
310 | "outputs": [],
311 | "source": [
312 | "df, df_labels = df.drop(['match'], axis=1), df['match']"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "# 1. Preprocessing Data\n",
320 | " - 1.1 Cleaning\n",
321 | " - 1.2 Transformation\n",
322 | " - 1.3 Reduction by PCA"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "## 1.1 Cleaning"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "### Cleaning null features\n",
337 | "If a feature has more than 30% (2513) of values are null, we just drop the whole column. "
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": 7,
343 | "metadata": {
344 | "collapsed": false
345 | },
346 | "outputs": [
347 | {
348 | "name": "stdout",
349 | "output_type": "stream",
350 | "text": [
351 | "194\n",
352 | "194\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "na_sum = list(df.isnull().sum())\n",
358 | "print(len(na_sum))\n",
359 | "#na_col = list(df.isnull().sum().index)\n",
360 | "#print(len(na_col))"
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": 8,
366 | "metadata": {
367 | "collapsed": false
368 | },
369 | "outputs": [
370 | {
371 | "name": "stdout",
372 | "output_type": "stream",
373 | "text": [
374 | "We can drop 83 Columns\n"
375 | ]
376 | }
377 | ],
378 | "source": [
379 | "drop_col =[]\n",
380 | "for i in range(len(na_sum)):\n",
381 | " if na_sum[i] > 2523:\n",
382 | " drop_col.append(na_col[i])\n",
383 | "print(\"We can drop \",len(drop_col),\" Columns\")"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": 9,
389 | "metadata": {
390 | "collapsed": false
391 | },
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/html": [
396 | "\n",
397 | "
\n",
398 | " \n",
399 | " \n",
400 | " | \n",
401 | " iid | \n",
402 | " id | \n",
403 | " gender | \n",
404 | " idg | \n",
405 | " condtn | \n",
406 | " wave | \n",
407 | " round | \n",
408 | " position | \n",
409 | " positin1 | \n",
410 | " order | \n",
411 | " ... | \n",
412 | " sinc1_2 | \n",
413 | " intel1_2 | \n",
414 | " fun1_2 | \n",
415 | " amb1_2 | \n",
416 | " shar1_2 | \n",
417 | " attr3_2 | \n",
418 | " sinc3_2 | \n",
419 | " intel3_2 | \n",
420 | " fun3_2 | \n",
421 | " amb3_2 | \n",
422 | "
\n",
423 | " \n",
424 | " \n",
425 | " \n",
426 | " 0 | \n",
427 | " 1 | \n",
428 | " 1.0 | \n",
429 | " 0 | \n",
430 | " 1 | \n",
431 | " 1 | \n",
432 | " 1 | \n",
433 | " 10 | \n",
434 | " 7 | \n",
435 | " NaN | \n",
436 | " 4 | \n",
437 | " ... | \n",
438 | " 16.67 | \n",
439 | " 13.89 | \n",
440 | " 22.22 | \n",
441 | " 11.11 | \n",
442 | " 16.67 | \n",
443 | " 6.0 | \n",
444 | " 7.0 | \n",
445 | " 8.0 | \n",
446 | " 7.0 | \n",
447 | " 6.0 | \n",
448 | "
\n",
449 | " \n",
450 | " 1 | \n",
451 | " 1 | \n",
452 | " 1.0 | \n",
453 | " 0 | \n",
454 | " 1 | \n",
455 | " 1 | \n",
456 | " 1 | \n",
457 | " 10 | \n",
458 | " 7 | \n",
459 | " NaN | \n",
460 | " 3 | \n",
461 | " ... | \n",
462 | " 16.67 | \n",
463 | " 13.89 | \n",
464 | " 22.22 | \n",
465 | " 11.11 | \n",
466 | " 16.67 | \n",
467 | " 6.0 | \n",
468 | " 7.0 | \n",
469 | " 8.0 | \n",
470 | " 7.0 | \n",
471 | " 6.0 | \n",
472 | "
\n",
473 | " \n",
474 | " 2 | \n",
475 | " 1 | \n",
476 | " 1.0 | \n",
477 | " 0 | \n",
478 | " 1 | \n",
479 | " 1 | \n",
480 | " 1 | \n",
481 | " 10 | \n",
482 | " 7 | \n",
483 | " NaN | \n",
484 | " 10 | \n",
485 | " ... | \n",
486 | " 16.67 | \n",
487 | " 13.89 | \n",
488 | " 22.22 | \n",
489 | " 11.11 | \n",
490 | " 16.67 | \n",
491 | " 6.0 | \n",
492 | " 7.0 | \n",
493 | " 8.0 | \n",
494 | " 7.0 | \n",
495 | " 6.0 | \n",
496 | "
\n",
497 | " \n",
498 | " 3 | \n",
499 | " 1 | \n",
500 | " 1.0 | \n",
501 | " 0 | \n",
502 | " 1 | \n",
503 | " 1 | \n",
504 | " 1 | \n",
505 | " 10 | \n",
506 | " 7 | \n",
507 | " NaN | \n",
508 | " 5 | \n",
509 | " ... | \n",
510 | " 16.67 | \n",
511 | " 13.89 | \n",
512 | " 22.22 | \n",
513 | " 11.11 | \n",
514 | " 16.67 | \n",
515 | " 6.0 | \n",
516 | " 7.0 | \n",
517 | " 8.0 | \n",
518 | " 7.0 | \n",
519 | " 6.0 | \n",
520 | "
\n",
521 | " \n",
522 | " 4 | \n",
523 | " 1 | \n",
524 | " 1.0 | \n",
525 | " 0 | \n",
526 | " 1 | \n",
527 | " 1 | \n",
528 | " 1 | \n",
529 | " 10 | \n",
530 | " 7 | \n",
531 | " NaN | \n",
532 | " 7 | \n",
533 | " ... | \n",
534 | " 16.67 | \n",
535 | " 13.89 | \n",
536 | " 22.22 | \n",
537 | " 11.11 | \n",
538 | " 16.67 | \n",
539 | " 6.0 | \n",
540 | " 7.0 | \n",
541 | " 8.0 | \n",
542 | " 7.0 | \n",
543 | " 6.0 | \n",
544 | "
\n",
545 | " \n",
546 | "
\n",
547 | "
5 rows × 111 columns
\n",
548 | "
"
549 | ],
550 | "text/plain": [
551 | " iid id gender idg condtn wave round position positin1 order \\\n",
552 | "0 1 1.0 0 1 1 1 10 7 NaN 4 \n",
553 | "1 1 1.0 0 1 1 1 10 7 NaN 3 \n",
554 | "2 1 1.0 0 1 1 1 10 7 NaN 10 \n",
555 | "3 1 1.0 0 1 1 1 10 7 NaN 5 \n",
556 | "4 1 1.0 0 1 1 1 10 7 NaN 7 \n",
557 | "\n",
558 | " ... sinc1_2 intel1_2 fun1_2 amb1_2 shar1_2 attr3_2 sinc3_2 \\\n",
559 | "0 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
560 | "1 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
561 | "2 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
562 | "3 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
563 | "4 ... 16.67 13.89 22.22 11.11 16.67 6.0 7.0 \n",
564 | "\n",
565 | " intel3_2 fun3_2 amb3_2 \n",
566 | "0 8.0 7.0 6.0 \n",
567 | "1 8.0 7.0 6.0 \n",
568 | "2 8.0 7.0 6.0 \n",
569 | "3 8.0 7.0 6.0 \n",
570 | "4 8.0 7.0 6.0 \n",
571 | "\n",
572 | "[5 rows x 111 columns]"
573 | ]
574 | },
575 | "execution_count": 9,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "df = df.drop(drop_col,axis=1)\n",
582 | "df.head()"
583 | ]
584 | },
585 | {
586 | "cell_type": "markdown",
587 | "metadata": {},
588 | "source": [
589 | "### Imputing null values with mean"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": 10,
595 | "metadata": {
596 | "collapsed": false
597 | },
598 | "outputs": [],
599 | "source": [
600 | "df = df.fillna(df.mean())"
601 | ]
602 | },
603 | {
604 | "cell_type": "code",
605 | "execution_count": 11,
606 | "metadata": {
607 | "collapsed": false
608 | },
609 | "outputs": [
610 | {
611 | "data": {
612 | "text/plain": [
613 | "True"
614 | ]
615 | },
616 | "execution_count": 11,
617 | "metadata": {},
618 | "output_type": "execute_result"
619 | }
620 | ],
621 | "source": [
622 | "#check if any NaN values\n",
623 | "df.isnull().values.any()"
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {},
629 | "source": [
630 | "#### This means there are still columns which have null values. Let's further check."
631 | ]
632 | },
633 | {
634 | "cell_type": "code",
635 | "execution_count": 12,
636 | "metadata": {
637 | "collapsed": false
638 | },
639 | "outputs": [
640 | {
641 | "data": {
642 | "text/plain": [
643 | "['field', 'race', 'imprace', 'income']"
644 | ]
645 | },
646 | "execution_count": 12,
647 | "metadata": {},
648 | "output_type": "execute_result"
649 | }
650 | ],
651 | "source": [
652 | "na_sum = list(df.isnull().sum())\n",
653 | "#na_col = list(df.isnull().sum().index)\n",
654 | "nan_col =[]\n",
655 | "for i in range(len(na_sum)):\n",
656 | " if na_sum[i] > 0:\n",
657 | " nan_col.append(na_col[i])\n",
658 | "nan_col"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": 13,
664 | "metadata": {
665 | "collapsed": false
666 | },
667 | "outputs": [
668 | {
669 | "data": {
670 | "text/html": [
671 | "\n",
672 | "
\n",
673 | " \n",
674 | " \n",
675 | " | \n",
676 | " field | \n",
677 | " from | \n",
678 | " zipcode | \n",
679 | " career | \n",
680 | "
\n",
681 | " \n",
682 | " \n",
683 | " \n",
684 | " 0 | \n",
685 | " Law | \n",
686 | " Chicago | \n",
687 | " 60,521 | \n",
688 | " lawyer | \n",
689 | "
\n",
690 | " \n",
691 | " 1 | \n",
692 | " Law | \n",
693 | " Chicago | \n",
694 | " 60,521 | \n",
695 | " lawyer | \n",
696 | "
\n",
697 | " \n",
698 | " 2 | \n",
699 | " Law | \n",
700 | " Chicago | \n",
701 | " 60,521 | \n",
702 | " lawyer | \n",
703 | "
\n",
704 | " \n",
705 | " 3 | \n",
706 | " Law | \n",
707 | " Chicago | \n",
708 | " 60,521 | \n",
709 | " lawyer | \n",
710 | "
\n",
711 | " \n",
712 | " 4 | \n",
713 | " Law | \n",
714 | " Chicago | \n",
715 | " 60,521 | \n",
716 | " lawyer | \n",
717 | "
\n",
718 | " \n",
719 | "
\n",
720 | "
"
721 | ],
722 | "text/plain": [
723 | " field from zipcode career\n",
724 | "0 Law Chicago 60,521 lawyer\n",
725 | "1 Law Chicago 60,521 lawyer\n",
726 | "2 Law Chicago 60,521 lawyer\n",
727 | "3 Law Chicago 60,521 lawyer\n",
728 | "4 Law Chicago 60,521 lawyer"
729 | ]
730 | },
731 | "execution_count": 13,
732 | "metadata": {},
733 | "output_type": "execute_result"
734 | }
735 | ],
736 | "source": [
737 | "df[['field', 'from', 'zipcode', 'career']].head()"
738 | ]
739 | },
740 | {
741 | "cell_type": "markdown",
742 | "metadata": {},
743 | "source": [
744 | "#### These columns are values with object type. It is hard to predict null values for these variables. So, let's just drop all these variables. "
745 | ]
746 | },
747 | {
748 | "cell_type": "code",
749 | "execution_count": 14,
750 | "metadata": {
751 | "collapsed": true
752 | },
753 | "outputs": [],
754 | "source": [
755 | "df = df.drop(['from','zipcode','field','career'], axis=1)"
756 | ]
757 | },
758 | {
759 | "cell_type": "code",
760 | "execution_count": 15,
761 | "metadata": {
762 | "collapsed": false
763 | },
764 | "outputs": [
765 | {
766 | "data": {
767 | "text/plain": [
768 | "False"
769 | ]
770 | },
771 | "execution_count": 15,
772 | "metadata": {},
773 | "output_type": "execute_result"
774 | }
775 | ],
776 | "source": [
777 | "df.isnull().values.any()"
778 | ]
779 | },
780 | {
781 | "cell_type": "markdown",
782 | "metadata": {},
783 | "source": [
784 | "#### We have sucessfully cleaned all null variables in the dataset. "
785 | ]
786 | },
787 | {
788 | "cell_type": "markdown",
789 | "metadata": {},
790 | "source": [
791 | "## 1.2 Transformation"
792 | ]
793 | },
794 | {
795 | "cell_type": "markdown",
796 | "metadata": {},
797 | "source": [
798 | "### Normalize data"
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": 16,
804 | "metadata": {
805 | "collapsed": true
806 | },
807 | "outputs": [],
808 | "source": [
809 | "from sklearn.preprocessing import StandardScaler"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 17,
815 | "metadata": {
816 | "collapsed": false
817 | },
818 | "outputs": [],
819 | "source": [
820 | "X = StandardScaler().fit_transform(df)"
821 | ]
822 | },
823 | {
824 | "cell_type": "code",
825 | "execution_count": 18,
826 | "metadata": {
827 | "collapsed": false
828 | },
829 | "outputs": [
830 | {
831 | "data": {
832 | "text/plain": [
833 | "(8378, 107)"
834 | ]
835 | },
836 | "execution_count": 18,
837 | "metadata": {},
838 | "output_type": "execute_result"
839 | }
840 | ],
841 | "source": [
842 | "X.shape"
843 | ]
844 | },
845 | {
846 | "cell_type": "markdown",
847 | "metadata": {},
848 | "source": [
849 | "## 1.3 Reduction"
850 | ]
851 | },
852 | {
853 | "cell_type": "markdown",
854 | "metadata": {},
855 | "source": [
856 | "### PCA\n",
857 | " - Find out what is fairly good value for n_components according to the Explained Variance Ratio\n",
858 | " - Reduce dimensions by the n_components"
859 | ]
860 | },
861 | {
862 | "cell_type": "code",
863 | "execution_count": 19,
864 | "metadata": {
865 | "collapsed": false
866 | },
867 | "outputs": [
868 | {
869 | "data": {
870 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEKCAYAAAAB0GKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4XXWd7/H3NzuXnXuae9qmbUrTlnLH2hZF5SIKeOno\noIADCs4Mwwhej+PozJyH0Tnn6DPjeI56PFQUUEaUAQStWG6DFxBo6QVoaUtLm1KaXtNbLs09+Z4/\n1gps0qRZLd3Zyc7n9Tzr2Wuv9Vt7f3+95Ju1fjdzd0REREaSkeoARERkfFDCEBGRSJQwREQkEiUM\nERGJRAlDREQiUcIQEZFIlDBERCQSJQwREYlECUNERCLJTHUAJ1N5ebnPmDEj1WGIiIwbq1ev3u/u\nFVHKplXCmDFjBqtWrUp1GCIi44aZbY9aVo+kREQkEiUMERGJRAlDREQiUcIQEZFIkpowzOxSM9tk\nZlvM7KtDnDcz+154fq2ZnZtwrsTM7jezl81so5mdl8xYRUTk2JKWMMwsBvwAuAyYB1xtZvMGFbsM\nqA+3G4BbE859F3jE3ecCZwEbkxWriIiMLJl3GAuALe7e4O7dwD3A4kFlFgN3eWA5UGJmNWZWDLwb\nuB3A3bvd/XASYxURkREkM2FMAXYkvG8Mj0UpUwc0AXea2fNm9mMzy09GkP39zv/93Sv8cXNTMj5e\nRCRtjNVG70zgXOBWdz8HOAIc1QYCYGY3mNkqM1vV1HT8P/QzMowfPtnA71/e95YCFhFJd8lMGDuB\n2oT3U8NjUco0Ao3uviI8fj9BAjmKu9/m7vPdfX5FRaTR7UepKoqzt6XzhK4VEZkokpkwVgL1ZlZn\nZtnAVcDSQWWWAp8Me0stAprdfbe77wF2mNmcsNzFwIZkBVpVlKOEISIygqTNJeXuvWZ2M/AoEAPu\ncPf1ZnZjeH4JsAy4HNgCtAPXJ3zEZ4G7w2TTMOjcSVVVGGfFtoPJ+ngRkbSQ1MkH3X0ZQVJIPLYk\nYd+Bm4a59gVgfjLjG1BZFGdfayfujpmNxleKiIw7Y7XRe1RVFubQ0+ccau9JdSgiImOWEgZBozeg\ndgwRkWNQwiBo9AbY19qV4khERMYuJQx0hyEiEoUSBlBRGN5hKGGIiAxLCQOIZ8Uoyctib4seSYmI\nDEcJI1RZqMF7IiLHooQRqiqKs1eN3iIiw1LCCFUWxmnSHYaIyLCUMEJVRTnsa+2iv99THYqIyJik\nhBGqKorT2+8cbO9OdSgiImOSEkZoYPCeGr5FRIamhBGqKAwG7+1T11oRkSEpYYR0hyEicmxKGKHX\nR3ura62IyJCUMEI5mTFK87N1hyEiMgwljATBaG/dYYiIDEUJI0FVuPKeiIgcTQkjgeaTEhEZnhJG\ngqqiOE2tXfRptLeIyFGUMBJUFeXQ73CgTe0YIiKDKWEkqAxX3lPXWhGRoylhJNBSrSIiw1PCSDAw\n2nvX4Y4URyIiMvYkNWGY2aVmtsnMtpjZV4c4b2b2vfD8WjM7N+Hcq2a2zsxeMLNVyYxzQHVRnPKC\nHNa8dng0vk5EZFzJTNYHm1kM+AFwCdAIrDSzpe6+IaHYZUB9uC0Ebg1fB1zo7vuTFeNgZsbCulKW\nNxzA3TGz0fpqEZExL5l3GAuALe7e4O7dwD3A4kFlFgN3eWA5UGJmNUmMaUSLZpayu7mTHQf1WEpE\nJFEyE8YUYEfC+8bwWNQyDvyXma02sxuSFuUgC2eWAbB824HR+koRkXFhLDd6n+/uZxM8trrJzN49\nVCEzu8HMVpnZqqamprf8pfWVBZTmZ7O8QQlDRCRRMhPGTqA24f3U8FikMu4+8LoPeJDgEddR3P02\nd5/v7vMrKirectBmxoIZpaxoOPiWP0tEJJ0kM2GsBOrNrM7MsoGrgKWDyiwFPhn2lloENLv7bjPL\nN7NCADPLB94HvJTEWN9k0cxSdh7uYMfB9tH6ShGRMS9pvaTcvdfMbgYeBWLAHe6+3sxuDM8vAZYB\nlwNbgHbg+vDyKuDBsJdSJvBzd38kWbEONtCOsWLbQWpL80bra0VExrSkJQwAd19GkBQSjy1J2Hfg\npiGuawDOSmZsxzKnqpCSvCxWNBzgirdNTVUYIiJjylhu9E6ZjIywHWOb2jFERAYoYQxj4cwyXjvY\nrmlCRERCIyYMM5tqZg+aWZOZ7TOzX5pZ2j+nmT99EgBrGzVNiIgIRLvDuJOgN1MNMBn4TXgsrc0o\nywfQiG8RkVCUhFHh7ne6e2+4/QR46wMexrjivCyK4pm8pq61IiJAtIRxwMyuMbNYuF0DTIhh0NPK\n8pQwRERCURLGp4GPA3uA3cAVvDFeIq1NK83T4D0RkdCI4zDcfTvw4VGIZcypLc3jvzbso6/fiWVo\nqnMRmdiGTRhm9hV3/1cz+z7BzLFv4u6fS2pkY8C00jy6+/rZ29LJ5JLcVIcjIpJSx7rD2Bi+jspq\nd2PRtHBakNcOtithiMiEN2zCcPffhLvt7n5f4jkz+1hSoxojBhLGjoPtLArnlxIRmaiiNHp/LeKx\ntDO5JJcMQw3fIiIcuw3jMoKZZKeY2fcSThUBvckObCzIimVQU5yrrrUiIhy7DWMXQfvFh4HVCcdb\ngS8mM6ixZFqpxmKIiMCx2zBeBF40s5+7e88oxjSmTCvN44mX96U6DBGRlIuyHsYMM/smMA+IDxx0\n95lJi2oMmVaWx/62Ltq7e8nLTuryISIiY1rUyQdvJWi3uBC4C/hZMoMaS2pf7ymlSQhFZGKLkjBy\n3f0JwNx9u7v/M/CB5IY1diSOxRARmciiPGPpMrMM4JVwje6dQEFywxo7EsdiiIhMZFHuMD4P5AGf\nA94GXAN8KplBjSWT8rIoyNE05yIix7zDMLMYcKW7fxloY4LMUpvIzKjVrLUiIse+w3D3PuD8UYpl\nzJpWqsF7IiJR2jCeN7OlwH3AkYGD7v5A0qIaY2on5fGHTU24O2aa5lxEJqYoCSNOsMLeRQnHHJgw\nCWNaWR5dvf3sa+2iqig+8gUiImkoygJKJ9xuYWaXAt8FYsCP3f1bg85beP5yoB24zt3XJJyPEUxP\nstPdP3iicbxVp9YUAfD8a4e59PTqVIUhIpJSUXpJnZDwh/0PgMsIRolfbWbzBhW7DKgPtxsIBggm\n+jxvrMuRMmfXlpCfHePpLftTHYqISMokLWEAC4At7t7g7t3APcDiQWUWA3d5YDlQYmY1AGY2lWCA\n4I+TGGMkWbEMFs4s409KGCIygSUzYUwBdiS8bwyPRS3zf4CvAP3JCvB4vHNWOdv2H6HxkHpLicjE\nNGLCMLMqM7vdzB4O388zs79MZlBm9kFgn7uvjlD2BjNbZWarmpqakhbTu+rLAXhmy4GkfYeIyFgW\n5Q7jJ8CjwOTw/WbgCxGu2wnUJryfGh6LUuadwIfN7FWCR1kXmdmQEx66+23uPt/d51dUVEQI68TU\nVxZQUZjDU3osJSITVJSEUe7u9xI+GnL3XqAvwnUrgXozqzOzbOAqYOmgMkuBT1pgEdDs7rvd/Wvu\nPtXdZ4TX/c7dr4lYp6QwM86fVc4zW/bT3++pDEVEJCWiJIwjZlZGMPaCgR/sI10UJpabCe5ONgL3\nuvt6M7vRzG4Miy0DGoAtwI+Azxx/FUbP+bPKOXCkm417WlIdiojIqIsycO9LBHcCp5jZ00AFcEWU\nD3f3ZQRJIfHYkoR9B24a4TP+APwhyvcl2ztnBe0YT2/Zz2mTi1McjYjI6BrxDiMcSPce4B3A3wCn\nufvaZAc2FlUXx5lVWcCf1PAtIhNQlF5SNwEF7r7e3V8CCsxsTD86SqbzZ5Xz3LYDdPVGacYREUkf\nUdow/trdDw+8cfdDwF8nL6Sx7bxTyujs6Wdt44jNOCIiaSVKwohZwhSt4ZQf2ckLaWxbMKMUgOVb\n9VhKRCaWKAnjEeA/zexiM7sY+EV4bEKalJ/N3OpCVmw7mOpQRERGVZReUn9P0Nj9t+H7xxkD8zul\n0sK6Uu5d1UhPXz9ZsWTOriIiMnZE6SXV7+63uvsV4fbDcCW+CWvRzDI6evrUjiEiE0qUXlLvNLPH\nzWyzmTWY2TYzaxiN4MaqBXVhO0aD2jFEZOKI8jzlduA7BGt7vx2YH75OWGUFOdRXFqgdQ0QmlCht\nGM3u/nDSIxlnFs4s5cE1O+nt6ydT7RgiMgFE+Un3ezP7NzM7z8zOHdiSHtkYt7CujCPdfby0S/NK\nicjEEOUOY2H4Oj/hmAMXnfxwxo+FM4N2jBUNBzi7tiTF0YiIJN+ICcPdLxyNQMabysI4MyvyWd5w\ngL95zympDkdEJOmi3GFgZh8ATgPiA8fc/RvJCmq8WDCjlIdf2oO7kzAYXkQkLUXpVrsEuBL4LGDA\nx4DpSY5rXJhbXUhzRw/7WrtSHYqISNJFafR+h7t/Ejjk7l8HzgNmJzes8WF2VSEAm/e2pjgSEZHk\ni5IwOsLXdjObDPQANckLafyYXR0kjE17lDBEJP1FacN4yMxKgH8D1hD0kJrQc0kNKC/IoTQ/m1f2\ntqU6FBGRpIvSS+pfwt1fmtlDQNzdNYlSqL6ygM37dIchIulv2IRhZhe5++/M7KNDnMPdH0huaOPD\nnOpCHlizUz2lRCTtHesO4z3A74APDXHOASUMoL6qkLauXnY1dzKlJDfV4YiIJM2wCcPdbzGzDOBh\nd793FGMaV2ZXFgBBTyklDBFJZ8fsJeXu/cBXRimWcen1rrXqKSUiaS5Kt9r/MrMvm1mtmZUObEmP\nbJyYlJ9NRWEOm9VTSkTSXJSEcSVwE/AksDrcVkX5cDO71Mw2mdkWM/vqEOfNzL4Xnl87MAuumcXN\n7Dkze9HM1pvZ16NXafTNrirgFfWUEpE0F6Vbbd2JfLCZxYAfAJcAjcBKM1vq7hsSil0G1IfbQuDW\n8LULuMjd28wsC/iTmT3s7stPJJZkm11VyD3P7aC/38nIUE8pEUlPUScfPB2Yx5snH7xrhMsWAFvc\nvSH8jHuAxUBiwlgM3OXuDiw3sxIzq3H33cDAM56scPMosabC7KpCOnr6aDzUwbSyvFSHIyKSFFEm\nH7wF+H64XQj8K/DhCJ89BdiR8L4xPBapjJnFzOwFYB/wuLuviPCdKTG76o2eUiIi6SpKG8YVwMXA\nHne/HjgLKE5qVIC797n72cBUYEF4l3MUM7vBzFaZ2aqmpqZkhzWk+rCn1CYlDBFJY5EmHwy71/aa\nWRHBb/y1Ea7bOajc1PDYcZVx98PA74FLh/oSd7/N3ee7+/yKiooIYZ18RfEsaorjvKKEISJpLErC\nWBVOPvgjgh5Sa4BnI1y3Eqg3szozywauApYOKrMU+GTYW2oR0Ozuu82sIvxOzCyXoOH85WhVSo15\nNUU8v+MwQXOMiEj6idJL6jPh7hIzewQocve1Ea7rNbObgUeBGHCHu683sxvD80uAZcDlwBagHbg+\nvLwG+GnY0yoDuNfdHzq+qo2uC+dW8sTL+3hlX9vrg/lERNLJiAnDzJYC9wC/dvdXj+fD3X0ZQVJI\nPLYkYd8JxngMvm4tcM7xfFeqXTKvin/61Us8vmGvEoaIpKUoj6T+HTgf2GBm95vZFWYWH+miiaaq\nKM5ZtSU8tn5PqkMREUmKEROGu/8xfCw1E/gh8HGChm8Z5H3zqnixsZk9zZ2pDkVE5KSLcocx0PD8\n58CNwNuBnyYzqPHqffOqAHh8494URyIicvJFGbh3L7ARuAj4v8Ap7v7ZZAc2Hs2qLKCuPF+PpUQk\nLUWZGuR24Gp370t2MOOdmXHJvCrufHobLZ09FMWzUh2SiMhJE6UN41Eli+jeN6+Knj7nD5tSM+pc\nRCRZIrVhSHTnTJtEeUEOdy/frkF8IpJWlDBOsliG8aVLZrNi20F+/txrqQ5HROSkGbYNY2Axo+G4\n+5qTH056uHpBLb9dt4tvLnuZC+ZUaq1vEUkLx7rD+Pdw+wGwAriNYD6pFeExGYaZ8a2Pnkm/O197\nYJ0eTYlIWhg2Ybj7he5+IbAbODecEfZtBFN2DJ51VgapLc3j7y+dy5Obm/j1C7tSHY6IyFsWpQ1j\njruvG3jj7i8BpyYvpPRx7aLpzCzP5z9X7hi5sIjIGBclYaw1sx+b2QXh9iNgxNlqBTIyjA+cWcOK\nbQfY39aV6nBERN6SKAnjemA98Plw28Ab05DLCC4/o4Z+h0de0uhvERnfoqyH0WlmS4Bl7r5pFGJK\nK3OrC5lZns+ydbu5ZtH0VIcjInLCoswl9WHgBeCR8P3Z4RoZEoGZcfkZNSxv0GMpERnfojySugVY\nABwGcPcXgLpkBpVuLjujmn6Hx9ZrFlsRGb+iJIwed28edEwDC47DvJoiZpTlsWzd7lSHIiJywqIk\njPVm9gkgZmb1ZvZ94Jkkx5VWBh5LPdtwgINHulMdjojICYmSMD4LnAZ0Ab8AWoAvJDOodPSBM2vo\n63eW/HFrqkMRETkhUXpJtQP/GG5ygk6bXMwnFk7jticbOLWmkI+cMzXVIYmIHJcRE4aZzQa+DMxI\nLO/uFyUvrPT09Q+fRkNTG3//y3VML8vn3GmTUh2SiEhkUR5J3Qc8D/wT8HcJmxynrFgGt/7F26gu\ninPDXavVzVZExpUoCaPX3W919+fcffXAlvTI0tSk/GyWXPM29rd1cd+qxlSHIyISWZSE8Rsz+4yZ\n1ZhZ6cAW5cPN7FIz22RmW8zsq0OcNzP7Xnh+7cAaHGZWa2a/N7MNZrbezD5/nPUa0+ZNLuJt0yfx\n4PONmvpcRMaNKAnjUwSPoJ4BVofbqpEuMrMYwboZlwHzgKvNbN6gYpcB9eF2A3BreLwX+G/uPg9Y\nBNw0xLXj2kfOmcLmvW2s39WS6lBERCIZMWG4e90Q28wIn70A2OLuDe7eDdwDLB5UZjFwlweWAyVm\nVuPuuwdW9HP3VmAjMOW4ajbGffDMGrJjGTywRkuLiMj4MGzCMLOLwtePDrVF+OwpQOJCEI0c/UN/\nxDJmNoNg0aYVEb5z3CjJy+bCuRUsfXEXvX39qQ5HRGREx7rDeE/4+qEhtg8mOS4AzKwA+CXwBXcf\n8tmNmd1gZqvMbFVTU9NohHXSfOScqexv6+KpLftTHYqIyIiGHYfh7reErye69sVOoDbh/VSOXtp1\n2DJmlkWQLO529weOEedtBOuNM3/+/HHVgnzh3AqKc7N4YM1OLpxTmepwRESOacSBewBm9gGC6UHi\nA8fc/RsjXLYSqDezOoIkcBXwiUFllgI3m9k9wEKg2d13m5kBtwMb3f07kWoyDuVkxvjQWTXct6qR\nptYuKgpzUh2SiMiwoqyHsQS4kmBOKQM+Boy4EpC79wI3A48SNFrf6+7rzexGM7sxLLYMaAC2AD8C\nPhMefydwLXCRmb0QbpcfV83GieveMQMHvnTvC/T3j6sbJBGZYGykcQBmttbdz0x4LQAedvd3jU6I\n0c2fP99XrRqxx++Y8/MVr/EPD67j794/h5sunJXqcERkAjGz1e4+P0rZKOMwOsLXdjObDPQANSca\nnBzt6gW1fOisyfz7Y5t4btvBVIcjIjKkKAnjITMrAf4NWAO8SjDNuZwkZsb/+sjpTC/L53O/eJ7m\njp5UhyQicpQoA/f+xd0Pu/svCdou5rr7f09+aBNLYTyL7111Dk1tXfzP325IdTgiIkcZtpfUsQbn\nmRnH6uoqJ+aMqcX8zbtn8v/+sJUPnDmZ98yuSHVIIiKvO1a32g8d45wDShhJ8LmL63lsw16+9su1\nPPrFd1MYz0p1SCIiwLEH7p3ogD15C+JZMf71ijO54tZn+Mzda7hobiW1k/JYMLOUIiUPEUmhKCvu\nlQG3AOcT3Fn8CfiGux9IcmwT1rnTJvHl98/h+09s4alXgmlD5lYX8tBnzyczFqWfgojIyRflp889\nQBPw58AV4f5/JjMogc9cMIsN33g/q/7pvXzzo2fw8p5WfrZ8e6rDEpEJLErCqAl7Sm0Lt/8BVCU7\nMAk6F5QX5HDV22s5f1Y533l8MwePdKc6LBGZoKIkjMfM7Cozywi3jxNM9yGjxMy45UPzONLdx78/\ntinV4YjIBBUlYfw18HOgK9zuAf7GzFrNTMvFjZL6qkKuXTSdnz/3Gusam1MdjohMQFEG7hW6e4a7\nZ4VbRnis0N2LRiNICXzxvbMpzcvmz299hr+/fy1b9rWlOiQRmUCizFb7l4Pex8zsluSFJMMpzsvi\nVze9kyvfXsuvXtjJJf/7j/zwj1tTHZaITBBRHkldbGbLzKzGzE4HlgOFSY5LhlFbmse//NnpPP3V\ni7j0tGq++fDLPLR2V6rDEpEJYMRxGO7+CTO7ElgHHAE+4e5PJz0yOabyghz+95Vn09S6gi/d+yKT\nS3I5d9qkVIclImksyiOpeuDzBMulbgeuNbO8ZAcmI4tnxfjhtW+juijODXet4u4V22loamOkNU5E\nRE5ElCVafwPc5O5PhEunfolg+dXTkhqZRFJWkMOd17+d6+58jn988CUAaorjXL1gGtcums6k/OwU\nRygi6SLKintF7t4y6Nhsd9+c1MhOwHhdce9kcHe27T/Csw0HeHT9Xp7c3ERuVoyrFtTylffPJTc7\nluoQRWQMOikr7pnZVwDcvcXMPjbo9HUnHp4kg5kxs6KAv1g4nbs+vYBHv/BuPnBmDT955lU+/sNn\n2dPcmeoQRWScO1YbxlUJ+18bdO7SJMQiJ9Gc6kK+/bGz+NG182loamPxD/7E6u2H1L4hIifsWG0Y\nNsz+UO9ljHrvvCru/9t38Fc/XcWf3/oMk/KyOH1KMQvrSll89hRqS9V/QUSiOVbC8GH2h3ovY9ip\nNUX85rPn89t1u3mpsZm1O5v59mOb+fZjm1kwo5T3n17NwrpSTq0pIpah3wVEZGjDNnqbWR/BuAsD\ncoH2gVNA3N3H3Go+E7nR+3jtPNzBr57fyYPP73x9ipHCnEzeM6eCD501mQvmVJCTqYZykXR3PI3e\nI/aSGk+UME7MrsMdrHz1IM9uPcDjG/Zy4Eg3hfFM3lVfznmnlHP+rHLqyvNTHaaIJMGYSRhmdinw\nXSAG/NjdvzXovIXnLye4g7nO3deE5+4APgjsc/fTo3yfEsZb19vXzzNbD/Dbtbt56pUmdoW9q957\nahX/cPlcZlYUpDhCETmZxkTCMLMYsBm4BGgkGOx3tbtvSChzOfBZgoSxEPiuuy8Mz70baAPuUsJI\nDXdn+4F2Hlq7iyV/bKCzp4+rF0zj1JoiSvKyqC6Oc9bUErV7iIxjx5Mwooz0PlELgC3u3hAGdQ+w\nGNiQUGYxQUJwYLmZlZhZjbvvdvcnzWxGEuOTEZgZM8rzufmieq58+zS+8/gm7l6xnf6E3zEqCnO4\n/PRqLphTSWVRDhUFOZTmZ2vtcZE0lMyEMQXYkfC+keAuYqQyU4DdSYxLTkBFYQ7f/OiZ3PKh0zjc\n3sPhjm42723j4XW7uWflDn767BvrjZtBWX425QU5LKgr5a/On8m0MnXfFRnvkpkwRoWZ3QDcADBt\n2rQUR5P+4lkxqotjVBfHmVtdxIfPmkxbVy8bd7ewv7WLprau8LWbPc0d/OK51/jZ8u188MzJXPn2\nWubPmKTeVyLjVDITxk6gNuH91PDY8ZY5Jne/DbgNgjaM4w9T3qqCnEzePqN0yHN7mju54+lt3L18\nO0tf3EU8K4NFM8uYV1PEjLJ8ZpTnc+bUYuJZSiIiY10yE8ZKoN7M6giSwFXAJwaVWQrcHLZvLASa\n3V2Po9JIdXGcf7j8VD5/cT3LGw7w5OYmnt56gD+9sp/esDEkJzODBXWlLJpZRm1pHpWFOdSW5jGl\nJDfF0YtIoqQlDHfvNbObgUcJutXe4e7rzezG8PwSYBlBD6ktBN1qrx+43sx+AVwAlJtZI3CLu9+e\nrHglufJzMrn41CouPrUKCLrv7jrcyea9rTyz9QBPvdLEvz266U3X1Jbm8s5TyllQV8rc6iJOqczX\n4yyRFNLAPRkzmjt62NfSyd6WLrbsa+XprQdY3nCA1s5eAGIZxqk1hbyrvoJ31Zdzdm0JednjvhlO\nJKXGxDiMVFDCSD+9ff007D/Cy3ta2bSnhZWvHmLN9kOvP86qKMxhemke1cVxygtyKC/IZm51EWdP\nK6G8ICfF0YuMfWNlHIbIW5YZy2B2VSGzqwrhrMkAtHb2sLzhIJv2tLD9QDvbD7azYVcLTW1dr9+N\nQPBIa3ZlIadUFnBKRT5zqouYXVWguxKRE6T/OTLuFMazuGReFZfMqzrqXHt3L+t3tfD8a4d4cUcz\nW5vaeGrLfrp7+4FgjMjsykI+cu4UPnrOFCqL4qMdvsi4pUdSkvb6+p0dB9vDx1qtPPVKE6u2HyKW\nYSyYEUzrPqe6gJkVBUydlEtVYZwMTXciE4TaMERG0NDUxn2rG3l6y35e2dtGR0/f6+eyYxnUlMSZ\nUpLLlJJcTqksYE51IadWF1FVlEMwZ6ZIelDCEDkO/f3OjkPtbNt/hB2HOmg82E7j4Q52He6g8VAH\nTa1dr5etKsph/vRSzplWwtRJeVQV5VBZFGdSXha5WTElExl31OgtchwyMozpZflMLxt6zY/D7d1s\n2tPKxt0trHntMKu3H+K3644eX5qdmUFFQQ6zKguoryygpiSXmEEslsHUklwW1JWSn6P/cjJ+6Q5D\n5AQcaOtiT0sne1s62dfSxaH2Hg63d7O7uZMt+9rY2tRGV9jQPiArZpwzbRJnTClmRlke08vyqSvP\nZ0pJrtpMJGV0hyGSZGUFOZQV5HDa5OIhz/f1O22dvfS509vXz+a9bfxpy36e2bqfu1dsp7PnjWQS\nz8qgrryAU6sLObWmiHmTi5hXU8Sk/OzRqo5IJEoYIkkQyzCK895Y9r6yKM759eVAsDDVvtYutu0/\nwrb9R9i6r40tTW08s/UADzz/xtybNcVxZlcVUlaQzaS8bGqK45wxpZjTpxTr0ZakhP7ViYwyM6Oq\nKE5VUZxFM8vedO7gkW427Gphw+5m1u9qYWtTG1v2tXGovZv27qAnV4ZBVVGc3OwYedkxiuJZTMrP\npjQvm9nVhSyqK2VWZYEa4OWkU8IQGUNK87M5v7789buRRPvbuljX2MwLOw6z83AHHd19tHf30trZ\ny8ZdLexDF/n/AAAOE0lEQVRv66IlHOlemp/NrIoCJpfEqSnJJT87RnZmBvGsGIXxTIriWVQU5jC3\nuojsTK2OKNEoYYiME+UFOVw4t5IL51YOed7dee1gOyu2HWTltoNsP9jOqu2H2LN29+tzbw0Wz8rg\n7NoSzppaQmVRnMrCHKqK4tQUx6kujpOlpXYlgRKGSJowe6N78Mfnv7EumbvT2+909fbT2dNHa2cv\nLR097DzcwcpXD7Ly1YPc+fSrdPf1D/o8mJSXTUleFpPysplTXcjCulIW1pVpAOMEpW61IoK7c7i9\nh32tQXfh3Yc72NXcyYG2Lg6393DgSBfrd7bQ2hU88sqwYI2T4twsppXmMbMin+ml+RTEM19vV6kp\niTO5JJeieNYI3y6ppG61InJczIxJ+dlMyg/uJIbS29fPxt2trN5+kANHumnt7OVQezevHmjn1y/s\netNMwYlK87M5fUoxZ04pZkZ5PsW5WRTFM6kpzmXKpFxiGoMybihhiEgkmbEMzphazBlTjx574u60\ndPbS3t1Le3cfzR097D7cyc7D7WzZ18a6nS3c+set9A1qS8nOzKCuLJ/ywmwKcjIpjGcxuTjO1NI8\npk4K7k7ysmMU52ZRmp+tx2AppoQhIm+ZmVGcm0VxbsLjp2lvLtPZ08e+li5aOnto7uhh56EOtja1\nsbXpCIfau9nf2k5rZw97WjoZqo1+Ul4Ws6sKmVmRT1E8i4KcTKqK4pw+pZj6qgI10I8CJQwRGRXx\nrBjTyvJGLNfT18+uwx3sPNRBW1dwx3LwSDev7Aump398w15aO3vfNPVKTmYG08vyKMvPoawgm+Lc\nLArimRTmZFKQk0l+ePdSWZRDTXGcysK4HoWdACUMERlTsmIZx5wMckBPXz+NhzpY23iYtY3NNB5q\n50BbN+t3tdDS0UNrV+/rC2cNFsswaorjTJ2US01xLvk5MXKzYuRmZ5KfHSMvJ5OqwhxOn1JMTXFc\nj8JCShgiMi5lxTKoKw8mcFx89pQhy3T39nOkq5e2rl6aO3rY19rJrsOd7G4O7mAaD3Xw3LaDdPb0\n0RFugzuOluVnc0plQTA6vzCH6eX5zKkqZE5V4Zumf5kIlDBEJG1lZ2aQnRn0/gpGpgw9WeSA/n6n\ns7ePtq5eGg918NLOZtY1NrP9YDvrGg/zeEvnmyaOXFhXyrXnTef9p1VPiDYUJQwRkVBGhpGXnUle\ndiaVhXHOnTbpTefdnd3NnWze28raxmbuW72Dm3/+PGX52dSFXYYn5WcHPb0mBT29akvzmFySHt2H\nNXBPROQE9fU7T25u4tcv7GRfazDI8eCRbva2dr7p0VZWzDilooDzZ5XzrtkVLKwrJZ4VS13gCcbM\nEq1mdinwXSAG/NjdvzXovIXnLwfagevcfU2Ua4eihCEiY0FPXz97mjvZcbCd7Qfb2X6gnXU7D7Py\n1UN09/YzvSyP//j0wki9xpJtTIz0NrMY8APgEqARWGlmS919Q0Kxy4D6cFsI3AosjHitiMiYlBXL\noLY0j9rSPN6RcLyju48/bm7iqw+s5Yolz3DXXy5gbnVRyuI8Xslsw1gAbHH3BgAzuwdYDCT+0F8M\n3OXBbc5yMysxsxpgRoRrRUTGldzsGJeeXs3MinyuvX0FH1/yLDdfNIuczBgZGUbMjMwMIyvTqCiI\nMzmcj2usPL5KZsKYAuxIeN9IcBcxUpkpEa8VERmXZlcVcv+N7+BTdz7H/1r28ojlszMzKMzJJC8n\nRoYZRjC6PniFsvwc7r3xvKTHPe57SZnZDcANANOmTRuhtIjI2FBbmsfjX3wPLR099LvT505/P/T2\n99Pd28++1i52He5gd3MnLZ09tHUGo977w3bnfg96bTlQOEpL9ibzW3YCtQnvp4bHopTJinAtAO5+\nG3AbBI3eby1kEZHRE8sIZgkeysyKglGOZmTJHGmyEqg3szozywauApYOKrMU+KQFFgHN7r474rUi\nIjKKknaH4e69ZnYz8ChB19g73H29md0Ynl8CLCPoUruFoFvt9ce6NlmxiojIyDRwT0RkAjuecRjp\nP/mJiIicFEoYIiISiRKGiIhEooQhIiKRKGGIiEgkadVLysyagO0neHk5sP8khjMWTYQ6wsSo50So\nI0yMeqa6jtPdvSJKwbRKGG+Fma2K2rVsvJoIdYSJUc+JUEeYGPUcT3XUIykREYlECUNERCJRwnjD\nbakOYBRMhDrCxKjnRKgjTIx6jps6qg1DREQi0R2GiIhEMuEThpldamabzGyLmX011fGcLGZWa2a/\nN7MNZrbezD4fHi81s8fN7JXwdVKqY32rzCxmZs+b2UPh+7SqY7h08f1m9rKZbTSz89KtjgBm9sXw\n3+pLZvYLM4uP93qa2R1mts/MXko4NmydzOxr4c+iTWb2/tREPbwJnTDMLAb8ALgMmAdcbWbzUhvV\nSdML/Dd3nwcsAm4K6/ZV4Al3rweeCN+Pd58HNia8T7c6fhd4xN3nAmcR1DWt6mhmU4DPAfPd/XSC\nZQ2uYvzX8yfApYOODVmn8P/nVcBp4TX/L/wZNWZM6IQBLAC2uHuDu3cD9wCLUxzTSeHuu919Tbjf\nSvBDZgpB/X4aFvsp8GepifDkMLOpwAeAHyccTps6mlkx8G7gdgB373b3w6RRHRNkArlmlgnkAbsY\n5/V09yeBg4MOD1enxcA97t7l7tsI1glaMCqBRjTRE8YUYEfC+8bwWFoxsxnAOcAKoCpc1RBgD1CV\norBOlv8DfAXoTziWTnWsA5qAO8PHbj82s3zSq464+07g28BrwG6C1TcfI83qGRquTmP+59FETxhp\nz8wKgF8CX3D3lsRzHnSRG7fd5Mzsg8A+d189XJnxXkeC37rPBW5193OAIwx6LJMGdSR8jr+YIEFO\nBvLN7JrEMulQz8HGW50mesLYCdQmvJ8aHksLZpZFkCzudvcHwsN7zawmPF8D7EtVfCfBO4EPm9mr\nBI8TLzKzn5FedWwEGt19Rfj+foIEkk51BHgvsM3dm9y9B3gAeAfpV08Yvk5j/ufRRE8YK4F6M6sz\ns2yCBqelKY7ppDAzI3juvdHdv5NwainwqXD/U8CvRzu2k8Xdv+buU919BsHf3e/c/RrSq457gB1m\nNic8dDGwgTSqY+g1YJGZ5YX/di8maHdLt3rC8HVaClxlZjlmVgfUA8+lIL5hTfiBe2Z2OcFz8Bhw\nh7v/zxSHdFKY2fnAU8A63ni+/w8E7Rj3AtMIZvb9uLsPbpQbd8zsAuDL7v5BMysjjepoZmcTNOpn\nAw3A9QS/7KVNHQHM7OvAlQQ9/J4H/gooYBzX08x+AVxAMCPtXuAW4FcMUycz+0fg0wR/Bl9w94dT\nEPawJnzCEBGRaCb6IykREYlICUNERCJRwhARkUiUMEREJBIlDBERiUQJQ8YcM+szsxfCWUvvM7O8\nYcotM7OSE/j8yWZ2/1uI71UzKz/R68cLM7vOzCanOg4ZO5QwZCzqcPezw1lLu4EbE09aIMPdLw8n\n4jsu7r7L3a84WcGmsesIpukQAZQwZOx7CphlZjPCNQLuAl4Cagd+0w/PbTSzH4XrKTxmZrkAZjbL\nzP7LzF40szVmdkpY/qXw/HVm9msz+0O4PsEtA19sZr8ys9XhZ94wUqAWrK2yJvyuJ8JjpeHnrDWz\n5WZ2Znj8n83sp2b2lJltN7OPmtm/mtk6M3sknNZl4G5m4PhzZjYrPD7DzH4Xfu4TZjYtPP4TM/ue\nmT1jZg1mdkVCfH9nZivDa76e8DlH/dmF180H7g7v9nLN7FsWrK+y1sy+fRL+bmW8cXdt2sbUBrSF\nr5kE0yb8LTCDYMT6ooRyrxKMoJ1BMDL27PD4vcA14f4K4CPhfpxg2uwZwEvhsesIZkctA3IJktH8\n8Fxp+DpwvCzxewfFXEEw02jdoGu/D9wS7l8EvBDu/zPwJyCLYI2LduCy8NyDwJ8lfNc/hvufBB4K\n938DfCrc/zTwq3D/J8B9BL8MziOYvh/gfQRrR1t47iGCadOP9Wf3h4Q/izJgE28M9i1J9b8TbaO/\n6Q5DxqJcM3sBWEUwx9Dt4fHt7r58mGu2ufsL4f5qYIaZFQJT3P1BAHfvdPf2Ia593N0PuHsHwaR3\n54fHP2dmLwLLCSaFqz9GzIuAJz1YxwB/Y/qK84H/CI/9Digzs6Lw3MMeTLS3jmBqmkfC4+sIfpAP\n+EXC63nh/nnAz8P9/0iIGYLk0e/uG3hj6uz3hdvzwBpgbkJ9jvqzG6J+zUAncLuZfZQgwckEk5nq\nAESG0OHuZyceCOaj48gxrulK2O8juCuIavD8OB7OTfVe4Dx3bzezPxDcoZxMXQDu3m9mPe4+EEc/\nb/6/6cPsH/NzQ5bw+k13/2FiQQvWShnxz87de81sAcGkgFcANxPcMckEojsMSVserDTYaGZ/BhDO\nAjpUj6tLwraGXILVz54GioFDYbKYS3AHcSzLgXeHs4xiZqXh8aeAvwiPXQDs90HrkkRwZcLrs+H+\nMwQz9BJ+/lMjfMajwKctWB8FM5tiZpUjXNMKFIblC4Bid18GfJHgMZpMMLrDkHR3LfBDM/sG0AN8\njDevzgfBFNK/JFh/4GfuvsrM1gE3mtlGgmf3wz0KA8Ddm8KG8QfMLINgjYNLCNoq7jCztQSPcT41\n/KcMa1J4fRdwdXjsswSr8P0dwYp8148Q32NmdirwbHi31gZcQ3BHMZyfAEvMrINg3ftfm1mc4G7l\nSydQDxnnNFutTGhmdh1Bw+7NqY5lKBYsDjXf3fenOhYRPZISEZFIdIchIiKR6A5DREQiUcIQEZFI\nlDBERCQSJQwREYlECUNERCJRwhARkUj+P+Cuq/sp7mpkAAAAAElFTkSuQmCC\n",
871 | "text/plain": [
872 | ""
873 | ]
874 | },
875 | "metadata": {},
876 | "output_type": "display_data"
877 | }
878 | ],
879 | "source": [
880 | "#PCA In Sklearn\n",
881 | "from sklearn.decomposition import PCA\n",
882 | "\n",
883 | "pca_full = PCA(n_components = None)\n",
884 | "pca_full.fit(X)\n",
885 | "\n",
886 | "plt.plot(range(0,107), pca_full.explained_variance_ratio_)\n",
887 | "plt.ylabel('Explained variance ratio')\n",
888 | "plt.xlabel('Principal components')\n",
889 | "#plt.xticks(np.arange(0,100,5))\n",
890 | "plt.show()"
891 | ]
892 | },
893 | {
894 | "cell_type": "markdown",
895 | "metadata": {},
896 | "source": [
897 | "#### From above figure, we will choose 80 as the dimensions of Principal components."
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 20,
903 | "metadata": {
904 | "collapsed": false
905 | },
906 | "outputs": [
907 | {
908 | "name": "stdout",
909 | "output_type": "stream",
910 | "text": [
911 | "[[-0.11279008 0.43894183 -3.99012024 ..., 1.41998245 0.4333418\n",
912 | " 0.50221114]\n",
913 | " [-0.46084816 0.17902204 -3.58830904 ..., 1.60322019 0.17307902\n",
914 | " 0.3332049 ]\n",
915 | " [-2.61677928 2.52343016 -5.04277486 ..., 1.53863346 0.27665 0.10085208]\n",
916 | " ..., \n",
917 | " [ 4.29349456 3.1139787 8.86817852 ..., 1.59718786 -0.59968411\n",
918 | " -0.65606429]\n",
919 | " [ 3.06349133 4.87790257 7.91930136 ..., 0.8040673 -1.19636896\n",
920 | " 0.58418913]\n",
921 | " [ 1.87201368 5.70640416 7.50898992 ..., 1.19867262 -0.93258426\n",
922 | " 0.58532392]]\n",
923 | "\n",
924 | "[ 0.0599287 0.05130629 0.04778579 0.04120846 0.03794963 0.03442235\n",
925 | " 0.02928028 0.02648273 0.02516247 0.02253369 0.02148449 0.01909422\n",
926 | " 0.01792891 0.01660708 0.01649287 0.01499748 0.01459388 0.0135756\n",
927 | " 0.01335547 0.01295986 0.01223028 0.01205561 0.01173874 0.0115274\n",
928 | " 0.01122597 0.01099925 0.01050502 0.01044227 0.01014913 0.0100701\n",
929 | " 0.00998956 0.00958084 0.00934085 0.009218 0.00898172 0.00889459\n",
930 | " 0.00866534 0.00851304 0.008274 0.00815345 0.00807838 0.00788843\n",
931 | " 0.0077532 0.00759841 0.00726425 0.00711507 0.00700392 0.0068995\n",
932 | " 0.00674679 0.00654725 0.00645781 0.00639411 0.00625 0.00623941\n",
933 | " 0.00600632 0.00588609 0.00575109 0.00558769 0.00542205 0.00535553\n",
934 | " 0.00517615 0.00514075 0.00507192 0.00485718 0.00475494 0.00472658\n",
935 | " 0.00467574 0.00446718 0.00438275 0.00428521 0.00421698 0.00400091\n",
936 | " 0.00394904 0.0038194 0.00378123 0.00367514 0.00348699 0.0034291\n",
937 | " 0.00330351 0.00324087]\n"
938 | ]
939 | }
940 | ],
941 | "source": [
942 | "x_scaled = StandardScaler().fit_transform(X)\n",
943 | "pca = PCA(n_components = 80)\n",
944 | "x_pca = pca.fit_transform(x_scaled)\n",
945 | "print(x_pca, end = '\\n\\n')\n",
946 | "print(pca.explained_variance_ratio_)"
947 | ]
948 | },
949 | {
950 | "cell_type": "code",
951 | "execution_count": 21,
952 | "metadata": {
953 | "collapsed": false
954 | },
955 | "outputs": [
956 | {
957 | "data": {
958 | "text/plain": [
959 | "0.95439228841885837"
960 | ]
961 | },
962 | "execution_count": 21,
963 | "metadata": {},
964 | "output_type": "execute_result"
965 | }
966 | ],
967 | "source": [
968 | "sum(pca.explained_variance_ratio_)"
969 | ]
970 | },
971 | {
972 | "cell_type": "code",
973 | "execution_count": 22,
974 | "metadata": {
975 | "collapsed": false
976 | },
977 | "outputs": [
978 | {
979 | "data": {
980 | "text/plain": [
981 | "(8378, 80)"
982 | ]
983 | },
984 | "execution_count": 22,
985 | "metadata": {},
986 | "output_type": "execute_result"
987 | }
988 | ],
989 | "source": [
990 | "x_pca.shape"
991 | ]
992 | },
993 | {
994 | "cell_type": "markdown",
995 | "metadata": {},
996 | "source": [
997 | "# 2. Model Training\n",
998 | "We'll do three models and compare the prediction results:\n",
999 | " - 2.1 Manually made neural network\n",
1000 | " - 2.2 Tensorflow\n",
1001 | " - 2.3 SVM\n",
1002 | " - 2.3 Logistic regression"
1003 | ]
1004 | },
1005 | {
1006 | "cell_type": "markdown",
1007 | "metadata": {},
1008 | "source": [
1009 | "### Train and Test split"
1010 | ]
1011 | },
1012 | {
1013 | "cell_type": "code",
1014 | "execution_count": 23,
1015 | "metadata": {
1016 | "collapsed": true
1017 | },
1018 | "outputs": [],
1019 | "source": [
1020 | "from sklearn.model_selection import train_test_split\n",
1021 | "\n",
1022 | "X_train, X_test, y_train, y_test = train_test_split(x_pca, df_labels, test_size=0.2, random_state=0)"
1023 | ]
1024 | },
1025 | {
1026 | "cell_type": "code",
1027 | "execution_count": 24,
1028 | "metadata": {
1029 | "collapsed": false,
1030 | "scrolled": true
1031 | },
1032 | "outputs": [
1033 | {
1034 | "data": {
1035 | "text/plain": [
1036 | "(6702, 80)"
1037 | ]
1038 | },
1039 | "execution_count": 24,
1040 | "metadata": {},
1041 | "output_type": "execute_result"
1042 | }
1043 | ],
1044 | "source": [
1045 | "X_train.shape"
1046 | ]
1047 | },
1048 | {
1049 | "cell_type": "code",
1050 | "execution_count": 25,
1051 | "metadata": {
1052 | "collapsed": false
1053 | },
1054 | "outputs": [
1055 | {
1056 | "data": {
1057 | "text/plain": [
1058 | "array([[-4.68464119, -2.8248629 , -1.51037084, ..., -0.08057486,\n",
1059 | " -0.71625464, -0.10307144],\n",
1060 | " [-0.74018427, 1.56521961, -0.06997587, ..., 0.01645834,\n",
1061 | " 0.45644289, -0.24637226],\n",
1062 | " [ 1.74612994, -3.12552681, 1.71795705, ..., 0.16907101,\n",
1063 | " 0.4087692 , -0.38171825],\n",
1064 | " ..., \n",
1065 | " [ 0.75099882, -2.49960586, -1.51160927, ..., -0.68449073,\n",
1066 | " 0.04991678, -1.01516311],\n",
1067 | " [-3.93462896, 2.8487166 , -1.62335803, ..., 0.61735951,\n",
1068 | " 0.13858547, 0.21935022],\n",
1069 | " [-1.56477143, -1.87060714, -0.83035874, ..., -0.58688186,\n",
1070 | " -0.59803885, -0.11533395]])"
1071 | ]
1072 | },
1073 | "execution_count": 25,
1074 | "metadata": {},
1075 | "output_type": "execute_result"
1076 | }
1077 | ],
1078 | "source": [
1079 | "X_train"
1080 | ]
1081 | },
1082 | {
1083 | "cell_type": "markdown",
1084 | "metadata": {},
1085 | "source": [
1086 | "## 2.1 Manual Neural Network\n",
1087 | " - 2.1.1 Build the Neural Network\n",
1088 | " - 2.1.2 Set the hyperparameters, train the NN and evaluate\n",
1089 | " - 2.1.3 Adapt SGD method to improve the accuracy"
1090 | ]
1091 | },
1092 | {
1093 | "cell_type": "markdown",
1094 | "metadata": {},
1095 | "source": [
1096 | "### 2.1.1 Build the neural network"
1097 | ]
1098 | },
1099 | {
1100 | "cell_type": "code",
1101 | "execution_count": 96,
1102 | "metadata": {
1103 | "collapsed": false
1104 | },
1105 | "outputs": [],
1106 | "source": [
1107 | "class MyNeuralNetwork(object):\n",
1108 | " def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):\n",
1109 | " # Set number of nodes in input, hidden and output layers.\n",
1110 | " self.input_nodes = input_nodes\n",
1111 | " self.hidden_nodes = hidden_nodes\n",
1112 | " self.output_nodes = output_nodes\n",
1113 | "\n",
1114 | " # Initialize weights\n",
1115 | " self.weights_0_1 = np.zeros((self.hidden_nodes,self.input_nodes))\n",
1116 | "\n",
1117 | " self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, \n",
1118 | " (self.output_nodes, self.hidden_nodes))\n",
1119 | " self.lr = learning_rate\n",
1120 | " \n",
1121 | " #### Set this to your implemented sigmoid function ####\n",
1122 | " # Activation function is the sigmoid function\n",
1123 | " self.sigmoid_activation = lambda x : 1 / (1 + np.exp(-x))\n",
1124 | " self.sigmoid_output_2_derivative = lambda x: x * (1 - x)\n",
1125 | " \n",
1126 | " def train(self, inputs_array, targets_array):\n",
1127 | " # Convert inputs list to 2d array\n",
1128 | " inputs = inputs_array.T\n",
1129 | " targets = np.array(targets_array, ndmin=2)\n",
1130 | " #targets = targets_array\n",
1131 | " m = inputs_array.shape[0] # number of records\n",
1132 | " \n",
1133 | " #### Implement the forward pass here ####\n",
1134 | " ### Forward pass ###\n",
1135 | " # TODO: Hidden layer\n",
1136 | " layer_1_inputs = np.dot(self.weights_0_1, inputs) # signals into hidden layer\n",
1137 | " layer_1 = layer_1_inputs # signals from hidden layer\n",
1138 | " \n",
1139 | " # TODO: Output layer\n",
1140 | " layer_2_inputs = np.dot(self.weights_1_2,layer_1) # signals into final output layer\n",
1141 | " layer_2 = self.sigmoid_activation(layer_2_inputs) # signals from final output layer\n",
1142 | " \n",
1143 | " #### Implement the backward pass here ####\n",
1144 | " ### Backward pass ###\n",
1145 | " \n",
1146 | " # TODO: Output error \n",
1147 | " layer_2_errors = targets - layer_2 # Output layer error is the difference between desired target and actual output.\n",
1148 | " layer_2_delta = layer_2_errors * self.sigmoid_output_2_derivative(layer_2)\n",
1149 | " \n",
1150 | " # TODO: Backpropagated error\n",
1151 | " layer_1_errors = np.dot(self.weights_1_2.T,layer_2_delta) # errors propagated to the hidden layer 2x128\n",
1152 | " layer_1_delta = layer_1_errors # hidden layer gradients y = x -> 1\n",
1153 | " \n",
1154 | " # TODO: Update the weights\n",
1155 | " self.weights_1_2 += self.lr*np.dot(layer_2_delta,layer_1.T)/m # update hidden-to-output weights with gradient descent step\n",
1156 | " self.weights_0_1 += self.lr*np.dot(layer_1_delta,inputs.T)/m # update input-to-hidden weights with gradient descent step\n",
1157 | " \n",
1158 | " \n",
1159 | " def run(self, inputs_list):\n",
1160 | " # Run a forward pass through the network\n",
1161 | " inputs = np.array(inputs_list, ndmin=2).T\n",
1162 | " \n",
1163 | " #### Implement the forward pass here ####\n",
1164 | " # TODO: Hidden layer\n",
1165 | " hidden_inputs = np.dot(self.weights_0_1, inputs) # signals into hidden layer\n",
1166 | " hidden_outputs = hidden_inputs # signals from hidden layer\n",
1167 | " \n",
1168 | " # TODO: Output layer\n",
1169 | " final_inputs = np.dot(self.weights_1_2,hidden_outputs) # signals into final output layer\n",
1170 | " final_outputs = self.sigmoid_activation(final_inputs) # signals from final output layer \n",
1171 | " \n",
1172 | " return final_outputs"
1173 | ]
1174 | },
1175 | {
1176 | "cell_type": "markdown",
1177 | "metadata": {},
1178 | "source": [
1179 | "### 2.1.2 Train the model and evaluation"
1180 | ]
1181 | },
1182 | {
1183 | "cell_type": "code",
1184 | "execution_count": 97,
1185 | "metadata": {
1186 | "collapsed": false
1187 | },
1188 | "outputs": [
1189 | {
1190 | "name": "stdout",
1191 | "output_type": "stream",
1192 | "text": [
1193 | "0.658711217184\n"
1194 | ]
1195 | }
1196 | ],
1197 | "source": [
1198 | "from sklearn import metrics\n",
1199 | "### Set the hyperparameters here ###\n",
1200 | "epochs = 100 #100\n",
1201 | "learning_rate = 0.01 #0.1\n",
1202 | "hidden_nodes = 10 \n",
1203 | "output_nodes = 1\n",
1204 | "\n",
1205 | "N_i = X_train.shape[1]\n",
1206 | "network = MyNeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)\n",
1207 | "\n",
1208 | "for e in range(epochs):\n",
1209 | " network.train(X_train, y_train)\n",
1210 | " \n",
1211 | "y_pred = network.run(X_test)\n",
1212 | "y_pred = np.where(y_pred >= 0.5, 1, 0) # if probability >= 0.5, it is 1, else 0\n",
1213 | "\n",
1214 | "print(metrics.accuracy_score(y_test,y_pred[0]))"
1215 | ]
1216 | },
1217 | {
1218 | "cell_type": "markdown",
1219 | "metadata": {},
1220 | "source": [
1221 | "### 2.1.3 SGD"
1222 | ]
1223 | },
1224 | {
1225 | "cell_type": "code",
1226 | "execution_count": 98,
1227 | "metadata": {
1228 | "collapsed": false
1229 | },
1230 | "outputs": [],
1231 | "source": [
1232 | "#N_i = X_train.shape[1]\n",
1233 | "network = MyNeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)\n",
1234 | "\n",
1235 | "random_row_idx = np.zeros(128)\n",
1236 | "for e in range(epochs):\n",
1237 | " random_row_idx = np.random.choice(X_train.shape[0],size=128)\n",
1238 | " X_batch = X_train[random_row_idx,:]\n",
1239 | " y_batch = y_train[random_row_idx]\n",
1240 | " network.train(X_batch, y_batch)"
1241 | ]
1242 | },
1243 | {
1244 | "cell_type": "code",
1245 | "execution_count": 99,
1246 | "metadata": {
1247 | "collapsed": false
1248 | },
1249 | "outputs": [
1250 | {
1251 | "name": "stdout",
1252 | "output_type": "stream",
1253 | "text": [
1254 | "0.839498806683\n"
1255 | ]
1256 | },
1257 | {
1258 | "name": "stderr",
1259 | "output_type": "stream",
1260 | "text": [
1261 | "C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\ipykernel\\__main__.py:2: RuntimeWarning: invalid value encountered in greater_equal\n",
1262 | " from ipykernel import kernelapp as app\n"
1263 | ]
1264 | }
1265 | ],
1266 | "source": [
1267 | "y_pred = network.run(X_test)\n",
1268 | "y_pred = np.where(y_pred >= 0.5, 1, 0) # if probability >= 0.5, it is 1, else 0\n",
1269 | "print(metrics.accuracy_score(y_test,y_pred[0]))"
1270 | ]
1271 | },
1272 | {
1273 | "cell_type": "markdown",
1274 | "metadata": {
1275 | "collapsed": false
1276 | },
1277 | "source": [
1278 | "#### Wow, SGD improves the accuracy dramatically !!!!"
1279 | ]
1280 | },
1281 | {
1282 | "cell_type": "markdown",
1283 | "metadata": {},
1284 | "source": [
1285 | "## 2.2 Tensorflow"
1286 | ]
1287 | },
1288 | {
1289 | "cell_type": "code",
1290 | "execution_count": 31,
1291 | "metadata": {
1292 | "collapsed": true
1293 | },
1294 | "outputs": [],
1295 | "source": [
1296 | "import tensorflow as tf"
1297 | ]
1298 | },
1299 | {
1300 | "cell_type": "code",
1301 | "execution_count": 32,
1302 | "metadata": {
1303 | "collapsed": false,
1304 | "scrolled": true
1305 | },
1306 | "outputs": [
1307 | {
1308 | "name": "stdout",
1309 | "output_type": "stream",
1310 | "text": [
1311 | "WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.\n",
1312 | "WARNING:tensorflow:Using temporary folder as model directory: C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\n",
1313 | "INFO:tensorflow:Using default config.\n",
1314 | "INFO:tensorflow:Using config: {'keep_checkpoint_every_n_hours': 10000, 'keep_checkpoint_max': 5, '_evaluation_master': '', 'save_summary_steps': 100, '_task_id': 0, 'save_checkpoints_secs': 600, 'tf_config': gpu_options {\n",
1315 | " per_process_gpu_memory_fraction: 1\n",
1316 | "}\n",
1317 | ", '_is_chief': True, '_environment': 'local', 'save_checkpoints_steps': None, '_num_ps_replicas': 0, '_cluster_spec': , '_master': '', '_task_type': None, 'tf_random_seed': None}\n",
1318 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:315 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.\n",
1319 | "Instructions for updating:\n",
1320 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1321 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1322 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1323 | "Example conversion:\n",
1324 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1325 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:315 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with y is deprecated and will be removed after 2016-12-01.\n",
1326 | "Instructions for updating:\n",
1327 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1328 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1329 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1330 | "Example conversion:\n",
1331 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1332 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:315 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.\n",
1333 | "Instructions for updating:\n",
1334 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1335 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1336 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1337 | "Example conversion:\n",
1338 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1339 | "WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.\n",
1340 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.\n",
1341 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.\n",
1342 | "INFO:tensorflow:Summary name dnn/logits:fraction_of_zero_values is illegal; using dnn/logits_fraction_of_zero_values instead.\n",
1343 | "INFO:tensorflow:Summary name dnn/logits:activation is illegal; using dnn/logits_activation instead.\n",
1344 | "INFO:tensorflow:Create CheckpointSaverHook.\n",
1345 | "INFO:tensorflow:loss = 0.534811, step = 1\n",
1346 | "INFO:tensorflow:Saving checkpoints for 1 into C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\\model.ckpt.\n",
1347 | "WARNING:tensorflow:*******************************************************\n",
1348 | "WARNING:tensorflow:TensorFlow's V1 checkpoint format has been deprecated.\n",
1349 | "WARNING:tensorflow:Consider switching to the more efficient V2 format:\n",
1350 | "WARNING:tensorflow: `tf.train.Saver(write_version=tf.train.SaverDef.V2)`\n",
1351 | "WARNING:tensorflow:now on by default.\n",
1352 | "WARNING:tensorflow:*******************************************************\n",
1353 | "INFO:tensorflow:loss = 0.105281, step = 101\n",
1354 | "INFO:tensorflow:global_step/sec: 10.1418\n",
1355 | "INFO:tensorflow:loss = 0.033156, step = 201\n",
1356 | "INFO:tensorflow:global_step/sec: 10.5714\n",
1357 | "INFO:tensorflow:Saving checkpoints for 300 into C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\\model.ckpt.\n",
1358 | "WARNING:tensorflow:*******************************************************\n",
1359 | "WARNING:tensorflow:TensorFlow's V1 checkpoint format has been deprecated.\n",
1360 | "WARNING:tensorflow:Consider switching to the more efficient V2 format:\n",
1361 | "WARNING:tensorflow: `tf.train.Saver(write_version=tf.train.SaverDef.V2)`\n",
1362 | "WARNING:tensorflow:now on by default.\n",
1363 | "WARNING:tensorflow:*******************************************************\n",
1364 | "INFO:tensorflow:Loss for final step: 0.0162134.\n",
1365 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:348 in predict.: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.\n",
1366 | "Instructions for updating:\n",
1367 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1368 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1369 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1370 | "Example conversion:\n",
1371 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1372 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:348 in predict.: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.\n",
1373 | "Instructions for updating:\n",
1374 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1375 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1376 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1377 | "Example conversion:\n",
1378 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1379 | "WARNING:tensorflow:From C:\\Anaconda3\\envs\\DLNDF\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\estimators\\dnn.py:348 in predict.: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with as_iterable is deprecated and will be removed after 2016-12-01.\n",
1380 | "Instructions for updating:\n",
1381 | "Estimator is decoupled from Scikit Learn interface by moving into\n",
1382 | "separate class SKCompat. Arguments x, y and batch_size are only\n",
1383 | "available in the SKCompat class, Estimator will only accept input_fn.\n",
1384 | "Example conversion:\n",
1385 | " est = Estimator(...) -> est = SKCompat(Estimator(...))\n",
1386 | "WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.\n",
1387 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.\n",
1388 | "INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.\n",
1389 | "INFO:tensorflow:Summary name dnn/logits:fraction_of_zero_values is illegal; using dnn/logits_fraction_of_zero_values instead.\n",
1390 | "INFO:tensorflow:Summary name dnn/logits:activation is illegal; using dnn/logits_activation instead.\n",
1391 | "INFO:tensorflow:Loading model from checkpoint: C:\\Users\\minga\\AppData\\Local\\Temp\\tmpi4fp2htr\\model.ckpt-300-?????-of-00001.\n"
1392 | ]
1393 | }
1394 | ],
1395 | "source": [
1396 | "# Build one layer DNN with 40 units respectively.\n",
1397 | "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
1398 | "classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[40], n_classes=2)\n",
1399 | "\n",
1400 | "# Fit and predict.\n",
1401 | "classifier.fit(X_train, y_train, steps=300)\n",
1402 | "predictions = list(classifier.predict(X_test, as_iterable=True))\n",
1403 | "score = metrics.accuracy_score(y_test, predictions)"
1404 | ]
1405 | },
1406 | {
1407 | "cell_type": "code",
1408 | "execution_count": 33,
1409 | "metadata": {
1410 | "collapsed": false
1411 | },
1412 | "outputs": [
1413 | {
1414 | "name": "stdout",
1415 | "output_type": "stream",
1416 | "text": [
1417 | "TF Accuracy: 0.999403341289\n"
1418 | ]
1419 | }
1420 | ],
1421 | "source": [
1422 | "print('TF Accuracy: ', score)"
1423 | ]
1424 | },
1425 | {
1426 | "cell_type": "markdown",
1427 | "metadata": {},
1428 | "source": [
1429 | "## 2.3 SVM"
1430 | ]
1431 | },
1432 | {
1433 | "cell_type": "code",
1434 | "execution_count": 34,
1435 | "metadata": {
1436 | "collapsed": true
1437 | },
1438 | "outputs": [],
1439 | "source": [
1440 | "from sklearn.svm import SVC\n",
1441 | "svc = SVC()\n",
1442 | "svc.fit(X_train, y_train)\n",
1443 | "y_pred = svc.predict(X_test)"
1444 | ]
1445 | },
1446 | {
1447 | "cell_type": "code",
1448 | "execution_count": 35,
1449 | "metadata": {
1450 | "collapsed": false
1451 | },
1452 | "outputs": [
1453 | {
1454 | "data": {
1455 | "text/plain": [
1456 | "array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
1457 | " 0, 0, 0, 0, 0, 0, 0], dtype=int64)"
1458 | ]
1459 | },
1460 | "execution_count": 35,
1461 | "metadata": {},
1462 | "output_type": "execute_result"
1463 | }
1464 | ],
1465 | "source": [
1466 | "y_pred[:30]"
1467 | ]
1468 | },
1469 | {
1470 | "cell_type": "code",
1471 | "execution_count": 36,
1472 | "metadata": {
1473 | "collapsed": false
1474 | },
1475 | "outputs": [
1476 | {
1477 | "name": "stdout",
1478 | "output_type": "stream",
1479 | "text": [
1480 | "SVM Accuracy: 0.994630071599\n"
1481 | ]
1482 | }
1483 | ],
1484 | "source": [
1485 | "print('SVM Accuracy: ', metrics.accuracy_score(y_test,y_pred))"
1486 | ]
1487 | },
1488 | {
1489 | "cell_type": "markdown",
1490 | "metadata": {},
1491 | "source": [
1492 | "## 2.4 Logistic regression"
1493 | ]
1494 | },
1495 | {
1496 | "cell_type": "code",
1497 | "execution_count": 37,
1498 | "metadata": {
1499 | "collapsed": false
1500 | },
1501 | "outputs": [],
1502 | "source": [
1503 | "from sklearn.linear_model import LogisticRegression\n",
1504 | "\n",
1505 | "logreg = LogisticRegression(C=1e5)\n",
1506 | "logreg.fit(X_train, y_train)\n",
1507 | "y_pred = logreg.predict(X_test)"
1508 | ]
1509 | },
1510 | {
1511 | "cell_type": "code",
1512 | "execution_count": 38,
1513 | "metadata": {
1514 | "collapsed": false
1515 | },
1516 | "outputs": [
1517 | {
1518 | "data": {
1519 | "text/plain": [
1520 | "array([0, 0, 0, ..., 0, 0, 1], dtype=int64)"
1521 | ]
1522 | },
1523 | "execution_count": 38,
1524 | "metadata": {},
1525 | "output_type": "execute_result"
1526 | }
1527 | ],
1528 | "source": [
1529 | "y_pred"
1530 | ]
1531 | },
1532 | {
1533 | "cell_type": "code",
1534 | "execution_count": 39,
1535 | "metadata": {
1536 | "collapsed": false
1537 | },
1538 | "outputs": [
1539 | {
1540 | "name": "stdout",
1541 | "output_type": "stream",
1542 | "text": [
1543 | "Log Regression Accuracy: 1.0\n"
1544 | ]
1545 | }
1546 | ],
1547 | "source": [
1548 | "print('Log Regression Accuracy: ', metrics.accuracy_score(y_test,y_pred))"
1549 | ]
1550 | },
1551 | {
1552 | "cell_type": "markdown",
1553 | "metadata": {},
1554 | "source": [
1555 | "# 3. Summary\n",
1556 | " - Manual NN: 0.839498806683. \n",
1557 | " - TF Accuracy: 0.999403341289\n",
1558 | " - SVM Accuracy: 0.994630071599\n",
1559 | " - Log Regression Accuracy: 1.0"
1560 | ]
1561 | }
1562 | ],
1563 | "metadata": {
1564 | "kernelspec": {
1565 | "display_name": "Python 3",
1566 | "language": "python",
1567 | "name": "python3"
1568 | },
1569 | "language_info": {
1570 | "codemirror_mode": {
1571 | "name": "ipython",
1572 | "version": 3
1573 | },
1574 | "file_extension": ".py",
1575 | "mimetype": "text/x-python",
1576 | "name": "python",
1577 | "nbconvert_exporter": "python",
1578 | "pygments_lexer": "ipython3",
1579 | "version": "3.5.2"
1580 | }
1581 | },
1582 | "nbformat": 4,
1583 | "nbformat_minor": 2
1584 | }
1585 |
--------------------------------------------------------------------------------
/wk8-generate-art/house.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdalai/Deep-Learning-projects/f690b3d8901e2ee7d872765815306ed09ba83a5a/wk8-generate-art/house.jpg
--------------------------------------------------------------------------------
/wk8-generate-art/the_scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdalai/Deep-Learning-projects/f690b3d8901e2ee7d872765815306ed09ba83a5a/wk8-generate-art/the_scream.jpg
--------------------------------------------------------------------------------
/wk8-generate-art/wave.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdalai/Deep-Learning-projects/f690b3d8901e2ee7d872765815306ed09ba83a5a/wk8-generate-art/wave.jpg
--------------------------------------------------------------------------------