├── .idea ├── .name ├── vcs.xml ├── modules.xml ├── ML.iml ├── misc.xml └── workspace.xml ├── CMakeLists.txt ├── Utils.h ├── Utils.cpp ├── LinearRegression.h ├── LinearRegression.cpp └── main.cpp /.idea/.name: -------------------------------------------------------------------------------- 1 | ML -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | project(ML) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(ML ${SOURCE_FILES} LinearRegression.cpp LinearRegression.h Utils.cpp Utils.h) 8 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/ML.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Utils.h: -------------------------------------------------------------------------------- 1 | #ifndef ML_UTILS_H 2 | #define ML_UTILS_H 3 | 4 | #include 5 | 6 | class Utils { 7 | 8 | public: 9 | static double array_sum(double arr[], int len); 10 | 11 | static double *array_pow(double arr[], int len, int power); 12 | 13 | static double *array_multiplication(double arr1[], double arr2[], int len); 14 | 15 | static double *array_diff(double arr1[], double arr2[], int len); 16 | 17 | }; 18 | 19 | #endif //ML_UTILS_H 20 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Utils.h" 3 | 4 | double Utils::array_sum(double arr[], int len) { 5 | double s = 0; 6 | 7 | for (int i = 0; i < len; ++i) { 8 | s += arr[i]; 9 | } 10 | 11 | return s; 12 | } 13 | 14 | double *Utils::array_pow(double arr[], int len, int power) { 15 | double *arr2 = new double[len]; 16 | 17 | for (int i = 0; i < len; ++i) { 18 | arr2[i] = pow(arr[i], power); 19 | } 20 | 21 | return arr2; 22 | } 23 | 24 | double *Utils::array_multiplication(double arr1[], double arr2[], int len) { 25 | double *arr = new double[len]; 26 | 27 | for (int i = 0; i < len; ++i) { 28 | arr[i] = arr1[i] * arr2[i]; 29 | } 30 | 31 | return arr; 32 | } 33 | 34 | double *Utils::array_diff(double arr1[], double arr2[], int len) { 35 | double *arr = new double[len]; 36 | 37 | for (int i = 0; i < len; ++i) { 38 | arr[i] = arr1[i] - arr2[i]; 39 | } 40 | 41 | return arr; 42 | } 43 | -------------------------------------------------------------------------------- /LinearRegression.h: -------------------------------------------------------------------------------- 1 | #ifndef ML_LINEARREGRESSION_H 2 | #define ML_LINEARREGRESSION_H 3 | 4 | class LinearRegression { 5 | 6 | public: 7 | 8 | // First feature 9 | double *x; 10 | 11 | // Target feature 12 | double *y; 13 | 14 | // Number of training examples 15 | int m; 16 | 17 | // The theta coefficients 18 | double *theta; 19 | 20 | /** 21 | * Create a new instance from the given data set. 22 | */ 23 | LinearRegression(double x[], double y[], int m); 24 | 25 | /** 26 | * Train the model with the supplied parameters. 27 | * 28 | * @param alpha The learning rate, e.g. 0.01. 29 | * @param iterations The number of gradient descent steps to do. 30 | */ 31 | void train(double alpha, int iterations); 32 | 33 | /** 34 | * Try to predict y, given an x. 35 | */ 36 | double predict(double x); 37 | 38 | private: 39 | 40 | /** 41 | * Compute the cost J. 42 | */ 43 | static double compute_cost(double x[], double y[], double theta[], int m); 44 | 45 | /** 46 | * Compute the hypothesis. 47 | */ 48 | static double h(double x, double theta[]); 49 | 50 | /** 51 | * Calculate the target feature from the other ones. 52 | */ 53 | static double *calculate_predictions(double x[], double theta[], int m); 54 | 55 | /** 56 | * Performs gradient descent to learn theta by taking num_items gradient steps with learning rate alpha. 57 | */ 58 | static double *gradient_descent(double x[], double y[], double alpha, int iters, double *J, int m); 59 | 60 | }; 61 | 62 | 63 | #endif //ML_LINEARREGRESSION_H 64 | -------------------------------------------------------------------------------- /LinearRegression.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "LinearRegression.h" 3 | #include "Utils.h" 4 | 5 | using namespace std; 6 | 7 | LinearRegression::LinearRegression(double x[], double y[], int m) { 8 | this->x = x; 9 | this->y = y; 10 | this->m = m; 11 | } 12 | 13 | void LinearRegression::train(double alpha, int iterations) { 14 | double *J = new double[iterations]; 15 | this->theta = gradient_descent(x, y, alpha, iterations, J, m); 16 | 17 | cout << "J = "; 18 | for (int i = 0; i < iterations; ++i) { 19 | cout << J[i] << ' '; 20 | } 21 | cout << endl << "Theta: " << theta[0] << " " << theta[1] << endl; 22 | } 23 | 24 | double LinearRegression::predict(double x) { 25 | return h(x, theta); 26 | } 27 | 28 | double LinearRegression::compute_cost(double x[], double y[], double theta[], int m) { 29 | double *predictions = calculate_predictions(x, theta, m); 30 | double *diff = Utils::array_diff(predictions, y, m); 31 | double *sq_errors = Utils::array_pow(diff, m, 2); 32 | return (1.0 / (2 * m)) * Utils::array_sum(sq_errors, m); 33 | } 34 | 35 | double LinearRegression::h(double x, double theta[]) { 36 | return theta[0] + theta[1] * x; 37 | } 38 | 39 | double *LinearRegression::calculate_predictions(double x[], double theta[], int m) { 40 | double *predictions = new double[m]; 41 | 42 | // calculate h for each training example 43 | for (int i = 0; i < m; ++i) { 44 | predictions[i] = h(x[i], theta); 45 | } 46 | 47 | return predictions; 48 | } 49 | 50 | double *LinearRegression::gradient_descent(double x[], double y[], double alpha, int iters, double *J, int m) { 51 | double *theta = new double[2]; 52 | theta[0] = 1; 53 | theta[1] = 1; 54 | 55 | for (int i = 0; i < iters; ++i) { 56 | double *predictions = calculate_predictions(x, theta, m); 57 | double *diff = Utils::array_diff(predictions, y, m); 58 | 59 | double *errors_x1 = diff; 60 | double *errors_x2 = Utils::array_multiplication(diff, x, m); 61 | 62 | theta[0] = theta[0] - alpha * (1.0 / m) * Utils::array_sum(errors_x1, m); 63 | theta[1] = theta[1] - alpha * (1.0 / m) * Utils::array_sum(errors_x2, m); 64 | 65 | J[i] = compute_cost(x, y, theta, m); 66 | } 67 | 68 | return theta; 69 | } 70 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "LinearRegression.h" 3 | 4 | using namespace std; 5 | 6 | void chirps_vs_temp() { 7 | double chirps[] = {20, 16, 20, 18, 17, 16, 15, 17, 15, 16, 15, 17, 16, 17, 14}; 8 | double temperatures[] = {89, 72, 93, 84, 81, 75, 70, 82, 69, 83, 80, 83, 81, 84, 76}; 9 | 10 | LinearRegression lr(chirps, temperatures, 15); 11 | 12 | cout << "Enter learning rate alpha (default: 0.01): "; 13 | double alpha; 14 | cin >> alpha; 15 | 16 | cout << "Enter number of iterations (default: 1500): "; 17 | int iterations; 18 | cin >> iterations; 19 | 20 | cout << "Training model..." << endl; 21 | lr.train(alpha, iterations); 22 | 23 | cout << "Model has been trained, enter number of chirps: "; 24 | double x; 25 | cin >> x; 26 | 27 | double temperature = lr.predict(x); 28 | cout << "Estimated temperature: " << temperature << " F" << endl; 29 | } 30 | 31 | void sand_vs_slope() { 32 | double diameter[] = {0.17, 0.19, 0.22, 0.235, 0.235, 0.3, 0.35, 0.42, 0.85}; 33 | double slope[] = {0.63, 0.7, 0.82, 0.88, 1.15, 1.5, 4.4, 7.3, 11.3}; 34 | 35 | LinearRegression lr(diameter, slope, 9); 36 | 37 | cout << "Enter learning rate alpha (default: 0.01): "; 38 | double alpha; 39 | cin >> alpha; 40 | 41 | cout << "Enter number of iterations (default: 1500): "; 42 | int iterations; 43 | cin >> iterations; 44 | 45 | cout << "Training model..." << endl; 46 | lr.train(alpha, iterations); 47 | 48 | cout << "Model has been trained, enter diameter: "; 49 | double x; 50 | cin >> x; 51 | 52 | double temperature = lr.predict(x); 53 | cout << "Estimated slope: " << temperature << endl; 54 | } 55 | 56 | void fires_vs_theft() { 57 | double fires[] = {6.2, 9.5, 10.5, 7.7, 8.6, 34.1, 11, 6.9, 7.3, 15.1, 29.1, 2.2, 5.7, 2, 2.5, 4, 5.4, 2.2, 7.2, 58 | 15.1, 16.5, 18.4, 36.2, 39.7, 18.5, 23.3, 12.2, 5.6, 21.8, 21.6, 9, 3.6, 5, 28.6, 17.4, 11.3, 3.4, 59 | 11.9, 10.5, 10.7, 10.8, 4.8}; 60 | double theft[] = {29, 44, 36, 37, 53, 68, 75, 18, 31, 25, 34, 14, 11, 11, 22, 16, 27, 9, 29, 30, 40, 32, 41, 147, 61 | 22, 29, 46, 23, 4, 31, 39, 15, 32, 27, 32, 34, 17, 46, 42, 43, 34, 19}; 62 | 63 | LinearRegression lr(fires, theft, 42); 64 | 65 | cout << "Enter learning rate alpha (default: 0.01): "; 66 | double alpha; 67 | cin >> alpha; 68 | 69 | cout << "Enter number of iterations (default: 1500): "; 70 | int iterations; 71 | cin >> iterations; 72 | 73 | cout << "Training model..." << endl; 74 | lr.train(alpha, iterations); 75 | 76 | cout << "Model has been trained, enter fires: "; 77 | double x; 78 | cin >> x; 79 | 80 | double temperature = lr.predict(x); 81 | cout << "Estimated theft: " << temperature << endl; 82 | } 83 | 84 | int main() { 85 | while (1) { 86 | cout << "Choose example: " << endl; 87 | cout << "0: Exit" << endl; 88 | cout << "1. Cricket chirps/sec vs temperature in degrees fahrenheit" << endl; 89 | cout << "2. Diameter (mm) of sand granules vs slope on natural beach (degrees)" << endl; 90 | cout << "3. Fires per 1000 houses vs thefts per 1000 population in Chicago" << endl; 91 | 92 | int opt; 93 | cin >> opt; 94 | 95 | if (!opt) { 96 | break; 97 | } 98 | 99 | cout << endl; 100 | 101 | switch (opt) { 102 | case 1: 103 | chirps_vs_temp(); 104 | break; 105 | 106 | case 2: 107 | sand_vs_slope(); 108 | break; 109 | 110 | case 3: 111 | fires_vs_theft(); 112 | break; 113 | 114 | default: 115 | break; 116 | } 117 | 118 | cout << endl << endl; 119 | } 120 | 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 24 | 25 | 26 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 51 | 52 | 53 | 54 | 55 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 88 | 89 | 90 | 91 | 94 | 95 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 156 | 159 | 160 | 161 | 163 | 164 | 165 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | --------------------------------------------------------------------------------