├── .idea
├── .name
├── vcs.xml
├── modules.xml
├── ML.iml
├── misc.xml
└── workspace.xml
├── CMakeLists.txt
├── Utils.h
├── Utils.cpp
├── LinearRegression.h
├── LinearRegression.cpp
└── main.cpp
/.idea/.name:
--------------------------------------------------------------------------------
1 | ML
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.1)
2 | project(ML)
3 |
4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
5 |
6 | set(SOURCE_FILES main.cpp)
7 | add_executable(ML ${SOURCE_FILES} LinearRegression.cpp LinearRegression.h Utils.cpp Utils.h)
8 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/ML.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Utils.h:
--------------------------------------------------------------------------------
1 | #ifndef ML_UTILS_H
2 | #define ML_UTILS_H
3 |
4 | #include
5 |
6 | class Utils {
7 |
8 | public:
9 | static double array_sum(double arr[], int len);
10 |
11 | static double *array_pow(double arr[], int len, int power);
12 |
13 | static double *array_multiplication(double arr1[], double arr2[], int len);
14 |
15 | static double *array_diff(double arr1[], double arr2[], int len);
16 |
17 | };
18 |
19 | #endif //ML_UTILS_H
20 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/Utils.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "Utils.h"
3 |
4 | double Utils::array_sum(double arr[], int len) {
5 | double s = 0;
6 |
7 | for (int i = 0; i < len; ++i) {
8 | s += arr[i];
9 | }
10 |
11 | return s;
12 | }
13 |
14 | double *Utils::array_pow(double arr[], int len, int power) {
15 | double *arr2 = new double[len];
16 |
17 | for (int i = 0; i < len; ++i) {
18 | arr2[i] = pow(arr[i], power);
19 | }
20 |
21 | return arr2;
22 | }
23 |
24 | double *Utils::array_multiplication(double arr1[], double arr2[], int len) {
25 | double *arr = new double[len];
26 |
27 | for (int i = 0; i < len; ++i) {
28 | arr[i] = arr1[i] * arr2[i];
29 | }
30 |
31 | return arr;
32 | }
33 |
34 | double *Utils::array_diff(double arr1[], double arr2[], int len) {
35 | double *arr = new double[len];
36 |
37 | for (int i = 0; i < len; ++i) {
38 | arr[i] = arr1[i] - arr2[i];
39 | }
40 |
41 | return arr;
42 | }
43 |
--------------------------------------------------------------------------------
/LinearRegression.h:
--------------------------------------------------------------------------------
1 | #ifndef ML_LINEARREGRESSION_H
2 | #define ML_LINEARREGRESSION_H
3 |
4 | class LinearRegression {
5 |
6 | public:
7 |
8 | // First feature
9 | double *x;
10 |
11 | // Target feature
12 | double *y;
13 |
14 | // Number of training examples
15 | int m;
16 |
17 | // The theta coefficients
18 | double *theta;
19 |
20 | /**
21 | * Create a new instance from the given data set.
22 | */
23 | LinearRegression(double x[], double y[], int m);
24 |
25 | /**
26 | * Train the model with the supplied parameters.
27 | *
28 | * @param alpha The learning rate, e.g. 0.01.
29 | * @param iterations The number of gradient descent steps to do.
30 | */
31 | void train(double alpha, int iterations);
32 |
33 | /**
34 | * Try to predict y, given an x.
35 | */
36 | double predict(double x);
37 |
38 | private:
39 |
40 | /**
41 | * Compute the cost J.
42 | */
43 | static double compute_cost(double x[], double y[], double theta[], int m);
44 |
45 | /**
46 | * Compute the hypothesis.
47 | */
48 | static double h(double x, double theta[]);
49 |
50 | /**
51 | * Calculate the target feature from the other ones.
52 | */
53 | static double *calculate_predictions(double x[], double theta[], int m);
54 |
55 | /**
56 | * Performs gradient descent to learn theta by taking num_items gradient steps with learning rate alpha.
57 | */
58 | static double *gradient_descent(double x[], double y[], double alpha, int iters, double *J, int m);
59 |
60 | };
61 |
62 |
63 | #endif //ML_LINEARREGRESSION_H
64 |
--------------------------------------------------------------------------------
/LinearRegression.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "LinearRegression.h"
3 | #include "Utils.h"
4 |
5 | using namespace std;
6 |
7 | LinearRegression::LinearRegression(double x[], double y[], int m) {
8 | this->x = x;
9 | this->y = y;
10 | this->m = m;
11 | }
12 |
13 | void LinearRegression::train(double alpha, int iterations) {
14 | double *J = new double[iterations];
15 | this->theta = gradient_descent(x, y, alpha, iterations, J, m);
16 |
17 | cout << "J = ";
18 | for (int i = 0; i < iterations; ++i) {
19 | cout << J[i] << ' ';
20 | }
21 | cout << endl << "Theta: " << theta[0] << " " << theta[1] << endl;
22 | }
23 |
24 | double LinearRegression::predict(double x) {
25 | return h(x, theta);
26 | }
27 |
28 | double LinearRegression::compute_cost(double x[], double y[], double theta[], int m) {
29 | double *predictions = calculate_predictions(x, theta, m);
30 | double *diff = Utils::array_diff(predictions, y, m);
31 | double *sq_errors = Utils::array_pow(diff, m, 2);
32 | return (1.0 / (2 * m)) * Utils::array_sum(sq_errors, m);
33 | }
34 |
35 | double LinearRegression::h(double x, double theta[]) {
36 | return theta[0] + theta[1] * x;
37 | }
38 |
39 | double *LinearRegression::calculate_predictions(double x[], double theta[], int m) {
40 | double *predictions = new double[m];
41 |
42 | // calculate h for each training example
43 | for (int i = 0; i < m; ++i) {
44 | predictions[i] = h(x[i], theta);
45 | }
46 |
47 | return predictions;
48 | }
49 |
50 | double *LinearRegression::gradient_descent(double x[], double y[], double alpha, int iters, double *J, int m) {
51 | double *theta = new double[2];
52 | theta[0] = 1;
53 | theta[1] = 1;
54 |
55 | for (int i = 0; i < iters; ++i) {
56 | double *predictions = calculate_predictions(x, theta, m);
57 | double *diff = Utils::array_diff(predictions, y, m);
58 |
59 | double *errors_x1 = diff;
60 | double *errors_x2 = Utils::array_multiplication(diff, x, m);
61 |
62 | theta[0] = theta[0] - alpha * (1.0 / m) * Utils::array_sum(errors_x1, m);
63 | theta[1] = theta[1] - alpha * (1.0 / m) * Utils::array_sum(errors_x2, m);
64 |
65 | J[i] = compute_cost(x, y, theta, m);
66 | }
67 |
68 | return theta;
69 | }
70 |
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "LinearRegression.h"
3 |
4 | using namespace std;
5 |
6 | void chirps_vs_temp() {
7 | double chirps[] = {20, 16, 20, 18, 17, 16, 15, 17, 15, 16, 15, 17, 16, 17, 14};
8 | double temperatures[] = {89, 72, 93, 84, 81, 75, 70, 82, 69, 83, 80, 83, 81, 84, 76};
9 |
10 | LinearRegression lr(chirps, temperatures, 15);
11 |
12 | cout << "Enter learning rate alpha (default: 0.01): ";
13 | double alpha;
14 | cin >> alpha;
15 |
16 | cout << "Enter number of iterations (default: 1500): ";
17 | int iterations;
18 | cin >> iterations;
19 |
20 | cout << "Training model..." << endl;
21 | lr.train(alpha, iterations);
22 |
23 | cout << "Model has been trained, enter number of chirps: ";
24 | double x;
25 | cin >> x;
26 |
27 | double temperature = lr.predict(x);
28 | cout << "Estimated temperature: " << temperature << " F" << endl;
29 | }
30 |
31 | void sand_vs_slope() {
32 | double diameter[] = {0.17, 0.19, 0.22, 0.235, 0.235, 0.3, 0.35, 0.42, 0.85};
33 | double slope[] = {0.63, 0.7, 0.82, 0.88, 1.15, 1.5, 4.4, 7.3, 11.3};
34 |
35 | LinearRegression lr(diameter, slope, 9);
36 |
37 | cout << "Enter learning rate alpha (default: 0.01): ";
38 | double alpha;
39 | cin >> alpha;
40 |
41 | cout << "Enter number of iterations (default: 1500): ";
42 | int iterations;
43 | cin >> iterations;
44 |
45 | cout << "Training model..." << endl;
46 | lr.train(alpha, iterations);
47 |
48 | cout << "Model has been trained, enter diameter: ";
49 | double x;
50 | cin >> x;
51 |
52 | double temperature = lr.predict(x);
53 | cout << "Estimated slope: " << temperature << endl;
54 | }
55 |
56 | void fires_vs_theft() {
57 | double fires[] = {6.2, 9.5, 10.5, 7.7, 8.6, 34.1, 11, 6.9, 7.3, 15.1, 29.1, 2.2, 5.7, 2, 2.5, 4, 5.4, 2.2, 7.2,
58 | 15.1, 16.5, 18.4, 36.2, 39.7, 18.5, 23.3, 12.2, 5.6, 21.8, 21.6, 9, 3.6, 5, 28.6, 17.4, 11.3, 3.4,
59 | 11.9, 10.5, 10.7, 10.8, 4.8};
60 | double theft[] = {29, 44, 36, 37, 53, 68, 75, 18, 31, 25, 34, 14, 11, 11, 22, 16, 27, 9, 29, 30, 40, 32, 41, 147,
61 | 22, 29, 46, 23, 4, 31, 39, 15, 32, 27, 32, 34, 17, 46, 42, 43, 34, 19};
62 |
63 | LinearRegression lr(fires, theft, 42);
64 |
65 | cout << "Enter learning rate alpha (default: 0.01): ";
66 | double alpha;
67 | cin >> alpha;
68 |
69 | cout << "Enter number of iterations (default: 1500): ";
70 | int iterations;
71 | cin >> iterations;
72 |
73 | cout << "Training model..." << endl;
74 | lr.train(alpha, iterations);
75 |
76 | cout << "Model has been trained, enter fires: ";
77 | double x;
78 | cin >> x;
79 |
80 | double temperature = lr.predict(x);
81 | cout << "Estimated theft: " << temperature << endl;
82 | }
83 |
84 | int main() {
85 | while (1) {
86 | cout << "Choose example: " << endl;
87 | cout << "0: Exit" << endl;
88 | cout << "1. Cricket chirps/sec vs temperature in degrees fahrenheit" << endl;
89 | cout << "2. Diameter (mm) of sand granules vs slope on natural beach (degrees)" << endl;
90 | cout << "3. Fires per 1000 houses vs thefts per 1000 population in Chicago" << endl;
91 |
92 | int opt;
93 | cin >> opt;
94 |
95 | if (!opt) {
96 | break;
97 | }
98 |
99 | cout << endl;
100 |
101 | switch (opt) {
102 | case 1:
103 | chirps_vs_temp();
104 | break;
105 |
106 | case 2:
107 | sand_vs_slope();
108 | break;
109 |
110 | case 3:
111 | fires_vs_theft();
112 | break;
113 |
114 | default:
115 | break;
116 | }
117 |
118 | cout << endl << endl;
119 | }
120 |
121 | return 0;
122 | }
123 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
--------------------------------------------------------------------------------