├── Homework1-Linear-optimization-and-Robust-linear-regression ├── Robust-linear-regression-dataset-communities-crimes.ipynb ├── Robust-linear-regression-dataset-housing.ipynb ├── communities-and-crime.csv ├── housing.csv ├── hw1-answers-Nhi.pdf └── hw2-linear-optimization-and-robust-linear-regression.pdf ├── Homework2-Algorithmic-framework-for-linear-regression-and-convex-regression ├── 1.Algorithmic-Framework-for-Linear-Regression.ipynb ├── 2.Convex-regression.ipynb ├── hw2-answer.pdf └── hw2-subject.pdf ├── Homework3-Optimal-trees-and-optimal-data-imputation ├── hw3-answers-Nhi.pdf ├── hw3-optimal-trees-and-optimal-missing-data-imputation.jl └── hw3-subject.pdf ├── Homework4-Precriptive-approach-to-forecast-stocks-sales ├── hw4-Q3-ML-model-predict-sales.ipynb ├── hw4-Q4-prescriptive-approach-for-stocks-predictions.ipynb ├── hw4-answers-Nhi.pdf └── hw4-subject.pdf └── README.md /Homework1-Linear-optimization-and-Robust-linear-regression/Robust-linear-regression-dataset-communities-crimes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "using JuMP, Gurobi, DataFrames" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 22 | "Stacktrace:\n", 23 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 24 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 25 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 26 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 27 | " [5] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:88\u001b[22m\u001b[22m\n", 28 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:180\u001b[22m\u001b[22m\n", 29 | " [7] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n", 30 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 31 | " [9] \u001b[1m(::IJulia.##15#18)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 32 | "while loading In[2], in expression starting on line 1\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "df = readtable(\"communities-and-crime.csv\",header=false)\n", 38 | "X = Matrix(df[1:end - 1])\n", 39 | "y = df[end];" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# Split into training, validation and test sets (50%/25%/25%)\n", 49 | "n = length(y)\n", 50 | "val_start = round(Int, 0.50 * n)\n", 51 | "test_start = round(Int, 0.75 * n)\n", 52 | "train_X = X[1:val_start - 1, :]\n", 53 | "train_y = y[1:val_start - 1]\n", 54 | "val_X = X[val_start:test_start - 1, :]\n", 55 | "val_y = y[val_start:test_start - 1]\n", 56 | "test_X = X[test_start:end, :]\n", 57 | "test_y = y[test_start:end];" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "Size of training set:(60, 122)(60,)\n", 70 | "Size of validation set:(31, 122)(31,)\n", 71 | "Size of test set:(31, 122)(31,)\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "#See the size of training set and test set\n", 77 | "println(\"Size of training set:\",size(train_X),size(train_y))\n", 78 | "println(\"Size of validation set:\",size(val_X),size(val_y))\n", 79 | "println(\"Size of test set:\",size(test_X),size(test_y))" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "ridgelinear (generic function with 1 method)" 91 | ] 92 | }, 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "# write the training functions for different types of linear regressions\n", 100 | "\n", 101 | "##### standard linear regression #####\n", 102 | "function standardlinear(X, y)\n", 103 | " # OutputFlag=0 to hide output from solver\n", 104 | " m = Model(solver=GurobiSolver(OutputFlag=0)) \n", 105 | " p = size(X, 2) #nb of columns\n", 106 | "\n", 107 | " #variables \n", 108 | " @variable(m, t)\n", 109 | " @variable(m, β[1:p])\n", 110 | " \n", 111 | " # Constraints\n", 112 | " @constraint(m, norm(y - X * β) <= t)\n", 113 | " \n", 114 | " # Objective\n", 115 | " @objective(m, Min, t)\n", 116 | "\n", 117 | " solve(m)\n", 118 | " return getvalue(β)\n", 119 | "end\n", 120 | "\n", 121 | "##### lasso linear regression #####\n", 122 | "function lassolinear(X, y, ρ)\n", 123 | " # OutputFlag=0 to hide output from solver\n", 124 | " m = Model(solver=GurobiSolver(OutputFlag=0))\n", 125 | " \n", 126 | " p = size(X, 2) #nb of columns\n", 127 | "\n", 128 | " \n", 129 | " \n", 130 | " #variables \n", 131 | " @variable(m, t)\n", 132 | " @variable(m, β[1:p])\n", 133 | " @variable(m, a[1:p])\n", 134 | " \n", 135 | " # Constraints\n", 136 | " @constraint(m, norm(y - X * β) <= t)\n", 137 | " @constraint(m, -a[1:p] .<= β[1:p])\n", 138 | " @constraint(m, β[1:p] .<= a[1:p])\n", 139 | " @constraint(m, a[1:p] .>= 0)\n", 140 | " \n", 141 | "\n", 142 | " # Objective\n", 143 | " @objective(m, Min, t + ρ * sum(a[j] for j = 1:p))\n", 144 | "\n", 145 | " solve(m)\n", 146 | "\n", 147 | " return getvalue(β)\n", 148 | "end\n", 149 | "\n", 150 | "function lassolinear(X, y, ρ)\n", 151 | " # OutputFlag=0 to hide output from solver\n", 152 | " m = Model(solver=GurobiSolver(OutputFlag=0))\n", 153 | " \n", 154 | " p = size(X, 2) #nb of columns\n", 155 | "\n", 156 | " \n", 157 | " \n", 158 | " #variables \n", 159 | " @variable(m, t)\n", 160 | " @variable(m, β[1:p])\n", 161 | " @variable(m, a[1:p])\n", 162 | " \n", 163 | " # Constraints\n", 164 | " @constraint(m, norm(y - X * β) <= t)\n", 165 | " @constraint(m, -a[1:p] .<= β[1:p])\n", 166 | " @constraint(m, β[1:p] .<= a[1:p])\n", 167 | " @constraint(m, a[1:p] .>= 0)\n", 168 | " \n", 169 | "\n", 170 | " # Objective\n", 171 | " @objective(m, Min, t + ρ * sum(a[j] for j = 1:p))\n", 172 | "\n", 173 | " solve(m)\n", 174 | "\n", 175 | " return getvalue(β)\n", 176 | "end\n", 177 | "\n", 178 | "##### ridge linear regression #####\n", 179 | "function ridgelinear(X, y, ρ)\n", 180 | " # OutputFlag=0 to hide output from solver\n", 181 | " m = Model(solver=GurobiSolver(OutputFlag=0))\n", 182 | " p = size(X, 2) #nb of columns\n", 183 | "\n", 184 | " # Variables\n", 185 | " @variable(m, t)\n", 186 | " @variable(m, u)\n", 187 | " @variable(m, β[1:p])\n", 188 | " \n", 189 | " # Constraints\n", 190 | " @constraint(m, norm(y - X * β) <= t)\n", 191 | " @constraint(m, norm(β) <= u)\n", 192 | "\n", 193 | " # Objective\n", 194 | " @objective(m, Min, t + ρ * u)\n", 195 | "\n", 196 | " solve(m)\n", 197 | "\n", 198 | " return getvalue(β)\n", 199 | "end" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "findBestRho (generic function with 1 method)" 211 | ] 212 | }, 213 | "execution_count": 6, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "function findBestRho(train_X,\n", 220 | " train_y,\n", 221 | " val_X,\n", 222 | " val_y, \n", 223 | " rho_list)\n", 224 | " p = size(train_X, 2)\n", 225 | " k = length(rho_list)\n", 226 | " #instantiate arrays\n", 227 | " β_lasso_list = zeros(k, p)\n", 228 | " β_ridge_list = zeros(k, p)\n", 229 | " lasso_scores = zeros(k)\n", 230 | " ridge_scores = zeros(k)\n", 231 | " \n", 232 | " for i in 1:length(rho_list)\n", 233 | " # training on train sets for both regression methods\n", 234 | " \n", 235 | " β_lasso_list[i, :] = lassolinear(train_X, train_y, rho_list[i])\n", 236 | " #println(\"\\nβ_lasso for rho =\", rho_list[i], β_lasso_list[i, :])\n", 237 | " β_ridge_list[i, :] = ridgelinear(train_X, train_y, rho_list[i])\n", 238 | " #println(\"\\nβ_ridge for rho =\", rho_list[i], β_ridge_list[i, :])\n", 239 | " \n", 240 | " # performance metrics on validation sets for both regression methods\n", 241 | " lasso_scores[i] = norm(val_y - val_X * β_lasso_list[i, :])\n", 242 | " ridge_scores[i] = norm(val_y - val_X * β_ridge_list[i, :])\n", 243 | " \n", 244 | " \n", 245 | " end\n", 246 | " #println(lasso_scores)\n", 247 | " #println(ridge_scores)\n", 248 | " argmin_lasso = indmin(lasso_scores)\n", 249 | " argmin_ridge = indmin(ridge_scores)\n", 250 | " \n", 251 | " return rho_list[argmin_lasso], rho_list[argmin_ridge]\n", 252 | "end" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 7, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "Academic license - for non-commercial use only\r\n", 265 | "Academic license - for non-commercial use only\r\n", 266 | "Academic license - for non-commercial use only\r\n", 267 | "Academic license - for non-commercial use only\r\n", 268 | "Academic license - for non-commercial use only\r\n", 269 | "Academic license - for non-commercial use only\r\n", 270 | "Academic license - for non-commercial use only\r\n", 271 | "Academic license - for non-commercial use only\r\n", 272 | "Academic license - for non-commercial use only\r\n", 273 | "Academic license - for non-commercial use only\r\n", 274 | "Best rho for lasso: 1.0\n", 275 | "Best rho for ridge: 2.0\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "rho_list = [0.001, 0.01, 0.1, 1, 2]\n", 281 | "best_rho = findBestRho(train_X, train_y, val_X, val_y, rho_list)\n", 282 | "println(\"Best rho for lasso: \", best_rho[1] )\n", 283 | "println(\"Best rho for ridge: \", best_rho[2])" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 8, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "name": "stdout", 293 | "output_type": "stream", 294 | "text": [ 295 | "Academic license - for non-commercial use only\n", 296 | "\n", 297 | "Best β for standard linear regression is: [-0.0527188, 0.57872, -0.649981, -0.564125, -0.120323, -0.201748, 1.02188, 0.0263513, -0.636356, -0.885288, -0.111684, 0.393866, 0.797354, -0.0167027, 0.371823, 0.994967, -0.280043, 0.234241, 0.618182, -0.329469, 1.20287, 0.109965, -0.477511, 0.0487904, 0.0559053, 0.660927, 0.0281862, 0.51628, -0.792707, -1.08652, 1.20646, 0.661044, 0.31798, 0.00806002, -0.38317, -0.0977316, 0.197213, 0.16222, 0.151204, 0.222365, 0.386112, 0.759026, -0.23994, -1.19354, -0.71622, 0.0450472, 0.766182, 0.865574, -1.18412, -0.464879, -0.853547, -0.232746, -0.251155, 0.0510877, -0.0455335, 0.577966, 0.0274876, -1.20301, -0.403138, 0.998665, 0.738847, 1.37342, 0.421307, 0.130119, -0.540267, 1.15078, -0.233275, -0.446227, -0.502373, 0.855415, -0.140365, -0.683184, -0.507078, 0.186683, 0.129039, -0.525952, -0.792916, 0.280602, -0.033614, -1.34664, -0.0466065, 0.606263, -1.24193, -0.279984, 0.81491, 0.103892, -0.510585, -0.120731, 0.548593, 0.626101, -0.153423, 0.210207, 0.424231, 1.71205, -1.64007, 0.567523, 0.888169, -1.1854, -1.3182, -0.103848, -0.161441, -0.321818, 0.18046, 0.0, -0.603872, 0.361623, 0.519743, 1.35582, -0.274985, -0.28024, -1.79978, -0.0207462, 0.101271, -0.239687, -0.272179, -0.317739, 0.581075, 0.274234, -0.0256122, -0.032865, 0.251143, 0.166544]\n", 298 | "Academic license - for non-commercial use only\n", 299 | "\n", 300 | "Best β for lasso is: [4.01425e-10, 1.11734e-9, 0.113693, -1.49524e-10, 5.02483e-10, 3.84554e-10, 7.73213e-10, 1.00448e-9, 5.6085e-10, 2.43078e-10, 4.24719e-10, 0.0246489, 5.04567e-10, 6.78874e-10, 2.77552e-10, 5.45343e-10, 3.06558e-10, 0.192666, 1.92222e-10, 5.19826e-10, 5.52122e-10, 8.41795e-10, 5.25271e-10, 4.51622e-10, 5.8942e-10, 3.80329e-10, 5.89111e-10, 5.80149e-10, 1.22634e-9, 5.20845e-10, 7.45958e-10, 6.72105e-10, 1.97089e-6, 4.20597e-10, 1.45024e-10, 2.05957e-9, 2.68848e-10, 7.73343e-10, 8.88697e-10, 3.23608e-7, 1.3029e-9, 1.23482e-9, 3.21476e-9, -3.35969e-11, -4.14832e-13, 8.03769e-11, -6.10164e-11, 5.23599e-10, 3.76416e-10, 8.23385e-10, 0.401376, 3.28252e-10, 1.37571e-9, 2.2926e-9, 5.37727e-9, 6.61514e-9, 1.33993e-9, 1.44146e-9, 1.5549e-9, 1.39828e-9, 5.38261e-10, 6.57624e-10, 2.90309e-9, 1.26043e-9, 1.03111e-9, 2.97733e-9, 8.5972e-10, 1.60778e-10, 9.16622e-10, 1.87878e-9, 3.16807e-10, 6.51018e-10, 5.72221e-10, 1.73563e-10, 0.0672751, 7.93284e-10, -2.34223e-10, 9.6082e-10, 1.68372e-9, 6.87446e-10, 7.30169e-10, 8.0997e-10, 5.57811e-10, 9.06186e-10, 1.23292e-9, 9.40487e-10, 1.32271e-9, 1.1049e-9, 3.08024e-9, 8.23787e-10, 6.11714e-10, 9.56861e-10, 4.2307e-10, 9.03359e-10, 1.27946e-9, 6.34316e-10, 3.05067e-10, 7.2799e-10, 1.54192e-9, 8.66694e-10, 2.16414e-10, 5.32812e-10, 5.01206e-10, 7.2799e-10, 2.84305e-10, -2.90354e-11, 4.51842e-8, 1.88758e-10, 4.85446e-11, 5.92382e-9, 1.97186e-10, 1.18799e-9, 1.88706e-9, 3.54522e-11, 0.00494525, 1.25323e-7, 3.54845e-10, 2.03642e-10, 1.00817e-9, 9.44617e-10, 4.92779e-10, 4.8767e-10]\n", 301 | "Academic license - for non-commercial use only\n", 302 | "\n", 303 | "Best β for ridge is: [-0.00179024, 0.0210995, 0.0604243, -0.0444612, 0.00730347, -0.0103841, 0.021318, 0.0120572, 0.00861987, -0.00625892, -0.00071184, 0.0171033, -0.0073181, 0.000453718, 0.0086053, -0.0290851, -0.000752137, 0.0621785, -0.00377495, -0.00961731, -0.00924996, 0.00143777, 0.00513625, 0.00259165, 0.0126473, -0.00421982, -0.00105424, 0.00191188, 0.0266876, 0.00436946, 0.0140701, -0.00552895, 0.0504918, -0.00253214, -0.022885, 0.0230556, -0.00213958, 0.000127718, 0.0266348, 0.0308286, 0.0274537, 0.0293031, 0.0313558, -0.0397438, -0.0399155, -0.0426891, -0.0363046, 0.0209062, 0.0105985, 0.0153412, 0.0508595, 0.00181445, 0.00899175, 0.016794, 0.0245379, 0.0258113, -0.00621516, -0.00200901, 0.00258698, 0.00312103, 0.0179657, -0.0019436, 0.036574, 0.0258852, 0.0175554, 0.031703, 0.0137132, -0.0113492, 0.0156806, 0.0194562, -0.00200627, 0.0101063, -0.00863761, -0.01081, 0.0630721, -0.000481633, -0.0242943, 0.0288456, 0.0160439, -0.00848919, -0.00800247, -0.00474629, -0.00994586, 0.0033445, 0.00431782, 0.00324316, 0.0105073, 0.00630826, 0.0219826, 0.0319152, 0.0260859, -0.00535281, 0.0162316, 0.00838559, 0.0150737, 0.0115276, 0.0030069, 0.017645, 0.0123027, 0.0190922, -0.000399876, 0.0174587, 0.0258919, 0.017645, -0.0260469, -0.017494, 0.0510899, -0.0035923, 0.00406581, 0.041806, -0.00456649, 0.0236424, 0.0301581, -0.00895811, 0.0283926, 0.0139855, 0.00142679, -0.0041382, 0.0131051, 0.019255, -0.00452269, 0.00758123]\n" 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "# retrain the whole model with training and validation sets together\n", 309 | "\n", 310 | "new_train_X = vcat(train_X, val_X)\n", 311 | "new_train_y = vcat(train_y, val_y)\n", 312 | "\n", 313 | "# find best beta for standard linear regression\n", 314 | "β_standard = standardlinear(new_train_X, new_train_y)\n", 315 | "println(\"\\nBest β for standard linear regression is: \", β_standard)\n", 316 | "\n", 317 | "# find best beta for lasso\n", 318 | "\n", 319 | "ρ_lasso = best_rho[1]\n", 320 | "β_lasso = lassolinear(new_train_X, new_train_y, ρ_lasso)\n", 321 | "println(\"\\nBest β for lasso is: \", β_lasso)\n", 322 | "\n", 323 | "# find best beta for ridge\n", 324 | "\n", 325 | "ρ_ridge = best_rho[2]\n", 326 | "β_ridge = ridgelinear(new_train_X, new_train_y, ρ_ridge)\n", 327 | "println(\"\\nBest β for ridge is: \", β_ridge)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 9, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "name": "stdout", 337 | "output_type": "stream", 338 | "text": [ 339 | "Standard linear regression score: 2.5479228082978107\n", 340 | "Lasso score: 0.7833980234161126\n", 341 | "Ridge score: 0.750864457615523\n", 342 | "Baseline score: 1.4589372704759014\n", 343 | "\n", 344 | "Relative gap standard linear regression % baseline: -74.64238249713678 %\n", 345 | "Relative gap lasso % baseline: 46.30351562952598 %\n", 346 | "Relative gap ridge % baseline: 48.533465227700084 %\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "# score standard linear regression\n", 352 | "score_standard = norm(test_y - test_X * β_standard)\n", 353 | "println(\"Standard linear regression score: \", score_standard)\n", 354 | "\n", 355 | "# score lasso\n", 356 | "score_lasso = norm(test_y - test_X * β_lasso)\n", 357 | "println(\"Lasso score: \", score_lasso)\n", 358 | "\n", 359 | "# score ridge\n", 360 | "score_ridge = norm(test_y - test_X * β_ridge)\n", 361 | "println(\"Ridge score: \", score_ridge)\n", 362 | "\n", 363 | "# baseline\n", 364 | "train_y_mean = mean(new_train_y) #use mean on training and validation sets\n", 365 | "score_baseline = norm(test_y - train_y_mean)\n", 366 | "println(\"Baseline score: \", score_baseline)\n", 367 | "\n", 368 | "# compare scores of regression with the baseline model\n", 369 | "println(\"\\nRelative gap standard linear regression % baseline: \", (score_baseline - score_standard)*100/score_baseline, \" %\" )\n", 370 | "println(\"Relative gap lasso % baseline: \", (score_baseline - score_lasso)*100/score_baseline, \" %\" )\n", 371 | "println(\"Relative gap ridge % baseline: \", (score_baseline - score_ridge)*100/score_baseline, \" %\" )" 372 | ] 373 | } 374 | ], 375 | "metadata": { 376 | "kernelspec": { 377 | "display_name": "Julia 0.6.4", 378 | "language": "julia", 379 | "name": "julia-0.6" 380 | }, 381 | "language_info": { 382 | "file_extension": ".jl", 383 | "mimetype": "application/julia", 384 | "name": "julia", 385 | "version": "0.6.4" 386 | } 387 | }, 388 | "nbformat": 4, 389 | "nbformat_minor": 2 390 | } 391 | -------------------------------------------------------------------------------- /Homework1-Linear-optimization-and-Robust-linear-regression/Robust-linear-regression-dataset-housing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "using JuMP, Gurobi, DataFrames" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 22 | "Stacktrace:\n", 23 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 24 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 25 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 26 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 27 | " [5] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:88\u001b[22m\u001b[22m\n", 28 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:180\u001b[22m\u001b[22m\n", 29 | " [7] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n", 30 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 31 | " [9] \u001b[1m(::IJulia.##15#18)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 32 | "while loading In[2], in expression starting on line 1\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "df = readtable(\"housing.csv\",header=false)\n", 38 | "X = Matrix(df[1:end - 1])\n", 39 | "y = df[end];" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# Split into training, validation and test sets (50%/25%/25%)\n", 49 | "n = length(y)\n", 50 | "val_start, test_start = round(Int, 0.50 * n), round(Int, 0.75 * n)\n", 51 | "\n", 52 | "train_X, train_y = X[1:val_start - 1, :], y[1:val_start - 1]\n", 53 | " \n", 54 | "val_X = X[val_start:test_start - 1, :]\n", 55 | "val_y = y[val_start:test_start - 1]\n", 56 | "test_X = X[test_start:end, :]\n", 57 | "test_y = y[test_start:end];" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "Size of training set:(252, 13)(252,)\n", 70 | "Size of validation set:(127, 13)(127,)\n", 71 | "Size of test set:(127, 13)(127,)\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "#See the size of training set and test set\n", 77 | "println(\"Size of training set:\",size(train_X),size(train_y))\n", 78 | "println(\"Size of validation set:\",size(val_X),size(val_y))\n", 79 | "println(\"Size of test set:\",size(test_X),size(test_y))" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "ridgelinear (generic function with 1 method)" 91 | ] 92 | }, 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "# write the training functions for different types of linear regressions\n", 100 | "\n", 101 | "##### standard linear regression #####\n", 102 | "function standardlinear(X, y)\n", 103 | " # OutputFlag=0 to hide output from solver\n", 104 | " m = Model(solver=GurobiSolver(OutputFlag=0)) \n", 105 | " p = size(X, 2) #nb of columns\n", 106 | "\n", 107 | " #variables \n", 108 | " @variable(m, t)\n", 109 | " @variable(m, β[1:p])\n", 110 | " \n", 111 | " # Constraints\n", 112 | " @constraint(m, norm(y - X * β) <= t)\n", 113 | "\n", 114 | " # Objective\n", 115 | " @objective(m, Min, t)\n", 116 | "\n", 117 | " solve(m)\n", 118 | "\n", 119 | " return getvalue(β)\n", 120 | "end\n", 121 | "\n", 122 | "##### lasso linear regression #####\n", 123 | "function lassolinear(X, y, ρ)\n", 124 | " # OutputFlag=0 to hide output from solver\n", 125 | " m = Model(solver=GurobiSolver(OutputFlag=0))\n", 126 | " p = size(X, 2) #nb of columns\n", 127 | " \n", 128 | " #variables \n", 129 | " @variable(m, t)\n", 130 | " @variable(m, β[1:p])\n", 131 | " @variable(m, a[1:p])\n", 132 | " \n", 133 | " # Constraints\n", 134 | " @constraint(m, norm(y - X * β) <= t)\n", 135 | " @constraint(m, -a[1:p] .<= β[1:p])\n", 136 | " @constraint(m, β[1:p] .<= a[1:p])\n", 137 | " @constraint(m, a[1:p] .>= 0)\n", 138 | " \n", 139 | " # Objective\n", 140 | " @objective(m, Min, t + ρ * sum(a[j] for j = 1:p))\n", 141 | "\n", 142 | " solve(m)\n", 143 | " return getvalue(β)\n", 144 | "end\n", 145 | "\n", 146 | "##### ridge linear regression #####\n", 147 | "function ridgelinear(X, y, ρ)\n", 148 | " # OutputFlag=0 to hide output from solver\n", 149 | " m = Model(solver=GurobiSolver(OutputFlag=0))\n", 150 | " \n", 151 | " p = size(X, 2) #nb of columns\n", 152 | "\n", 153 | " # Variables\n", 154 | " @variable(m, t)\n", 155 | " @variable(m, u)\n", 156 | " @variable(m, β[1:p])\n", 157 | " \n", 158 | " # Constraints\n", 159 | " @constraint(m, norm(y - X * β) <= t)\n", 160 | " @constraint(m, norm(β) <= u)\n", 161 | "\n", 162 | " # Objective\n", 163 | " @objective(m, Min, t + ρ * u)\n", 164 | "\n", 165 | " solve(m)\n", 166 | " return getvalue(β)\n", 167 | "end" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "findBestRho (generic function with 1 method)" 179 | ] 180 | }, 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "function findBestRho(train_X,\n", 188 | " train_y,\n", 189 | " val_X,\n", 190 | " val_y, \n", 191 | " rho_list)\n", 192 | " p = size(train_X, 2)\n", 193 | " k = length(rho_list)\n", 194 | " #instantiate arrays\n", 195 | " β_lasso_list = zeros(k, p)\n", 196 | " β_ridge_list = zeros(k, p)\n", 197 | " lasso_scores = zeros(k)\n", 198 | " ridge_scores = zeros(k)\n", 199 | " \n", 200 | " for i in 1:length(rho_list)\n", 201 | " # training on train sets for both regression methods\n", 202 | " \n", 203 | " β_lasso_list[i, :] = lassolinear(train_X, train_y, rho_list[i])\n", 204 | " #println(\"\\nβ_lasso for rho =\", rho_list[i], β_lasso_list[i, :])\n", 205 | " β_ridge_list[i, :] = ridgelinear(train_X, train_y, rho_list[i])\n", 206 | " #println(\"\\nβ_ridge for rho =\", rho_list[i], β_ridge_list[i, :])\n", 207 | " \n", 208 | " # performance metrics on validation sets for both regression methods\n", 209 | " lasso_scores[i] = norm(val_y - val_X * β_lasso_list[i, :])\n", 210 | " ridge_scores[i] = norm(val_y - val_X * β_ridge_list[i, :])\n", 211 | " \n", 212 | " \n", 213 | " end\n", 214 | " #println(lasso_scores)\n", 215 | " #println(ridge_scores)\n", 216 | " argmin_lasso = indmin(lasso_scores)\n", 217 | " argmin_ridge = indmin(ridge_scores)\n", 218 | " \n", 219 | " return rho_list[argmin_lasso], rho_list[argmin_ridge]\n", 220 | "end" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 7, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "Academic license - for non-commercial use only\r\n", 233 | "Academic license - for non-commercial use only\r\n", 234 | "Academic license - for non-commercial use only\r\n", 235 | "Academic license - for non-commercial use only\r\n", 236 | "Academic license - for non-commercial use only\r\n", 237 | "Academic license - for non-commercial use only\r\n", 238 | "Academic license - for non-commercial use only\r\n", 239 | "Academic license - for non-commercial use only\r\n", 240 | "Academic license - for non-commercial use only\r\n", 241 | "Academic license - for non-commercial use only\r\n", 242 | "Best rho for lasso: 0.1\n", 243 | "Best rho for ridge: 2.0\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "rho_list = [0.001, 0.01, 0.1, 1, 2]\n", 249 | "\n", 250 | "best_rho = findBestRho(train_X, train_y, val_X, val_y, rho_list)\n", 251 | "\n", 252 | "println(\"Best rho for lasso: \", best_rho[1] )\n", 253 | "println(\"Best rho for ridge: \", best_rho[2])" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 8, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "name": "stdout", 263 | "output_type": "stream", 264 | "text": [ 265 | "Academic license - for non-commercial use only\n", 266 | "\n", 267 | "Best β for standard linear regression is: [-0.182837, 0.0463545, 0.0566232, 0.814289, -5.54437, 6.49243, -0.00690827, -1.03113, 0.541436, -0.0123992, -0.524355, 0.0130461, -0.394916]\n", 268 | "Academic license - for non-commercial use only\n", 269 | "\n", 270 | "Best β for lasso is: [-0.152615, 0.0472997, 0.0245548, 0.525413, -0.0077624, 6.19149, -0.0105729, -0.985242, 0.534167, -0.0141383, -0.50909, 0.0131665, -0.428667]\n", 271 | "Academic license - for non-commercial use only\n", 272 | "\n", 273 | "Best β for ridge is: [-0.119194, 0.0540906, 0.0293727, 0.521321, -0.0163899, 5.33832, 0.00387411, -0.923509, 0.49436, -0.0129209, -0.448035, 0.0225441, -0.500597]\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "# retrain the whole model with training and validation sets together\n", 279 | "\n", 280 | "new_train_X = vcat(train_X, val_X)\n", 281 | "new_train_y = vcat(train_y, val_y)\n", 282 | "\n", 283 | "# find best beta for standard linear regression\n", 284 | "β_standard = standardlinear(new_train_X, new_train_y)\n", 285 | "println(\"\\nBest β for standard linear regression is: \", β_standard)\n", 286 | "\n", 287 | "# find best beta for lasso\n", 288 | "\n", 289 | "ρ_lasso = best_rho[1]\n", 290 | "β_lasso = lassolinear(new_train_X, new_train_y, ρ_lasso)\n", 291 | "println(\"\\nBest β for lasso is: \", β_lasso)\n", 292 | "\n", 293 | "# find best beta for ridge\n", 294 | "\n", 295 | "ρ_ridge = best_rho[2]\n", 296 | "β_ridge = ridgelinear(new_train_X, new_train_y, ρ_ridge)\n", 297 | "println(\"\\nBest β for ridge is: \", β_ridge)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 9, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "Standard linear regression score: 91.31777137966286\n", 310 | "Lasso score: 89.47021659084704\n", 311 | "Ridge score: 83.59529327926523\n", 312 | "Baseline score: 129.2265350398843\n", 313 | "\n", 314 | "Relative gap standard linear regression % baseline: 29.335123508899578 %\n", 315 | "Relative gap lasso % baseline: 30.764825843830703 %\n", 316 | "Relative gap ridge % baseline: 35.3110464089558 %\n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "# score standard linear regression\n", 322 | "score_standard = norm(test_y - test_X * β_standard)\n", 323 | "println(\"Standard linear regression score: \", score_standard)\n", 324 | "\n", 325 | "# score lasso\n", 326 | "score_lasso = norm(test_y - test_X * β_lasso)\n", 327 | "println(\"Lasso score: \", score_lasso)\n", 328 | "\n", 329 | "# score ridge\n", 330 | "score_ridge = norm(test_y - test_X * β_ridge)\n", 331 | "println(\"Ridge score: \", score_ridge)\n", 332 | "\n", 333 | "# baseline\n", 334 | "train_y_mean = mean(new_train_y) #use mean on training and validation sets\n", 335 | "score_baseline = norm(test_y - train_y_mean)\n", 336 | "println(\"Baseline score: \", score_baseline)\n", 337 | "\n", 338 | "# compare scores of regression with the baseline model\n", 339 | "println(\"\\nRelative gap standard linear regression % baseline: \", (score_baseline - score_standard)*100/score_baseline, \" %\" )\n", 340 | "println(\"Relative gap lasso % baseline: \", (score_baseline - score_lasso)*100/score_baseline, \" %\" )\n", 341 | "println(\"Relative gap ridge % baseline: \", (score_baseline - score_ridge)*100/score_baseline, \" %\" )" 342 | ] 343 | } 344 | ], 345 | "metadata": { 346 | "kernelspec": { 347 | "display_name": "Julia 0.6.4", 348 | "language": "julia", 349 | "name": "julia-0.6" 350 | }, 351 | "language_info": { 352 | "file_extension": ".jl", 353 | "mimetype": "application/julia", 354 | "name": "julia", 355 | "version": "0.6.4" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 2 360 | } 361 | -------------------------------------------------------------------------------- /Homework1-Linear-optimization-and-Robust-linear-regression/housing.csv: -------------------------------------------------------------------------------- 1 | 0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24 0.02731,0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6 0.02729,0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7 0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4 0.06905,0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2 0.02985,0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21,28.7 0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43,22.9 0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15,27.1 0.21124,12.5,7.87,0,0.524,5.631,100,6.0821,5,311,15.2,386.63,29.93,16.5 0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1,18.9 0.22489,12.5,7.87,0,0.524,6.377,94.3,6.3467,5,311,15.2,392.52,20.45,15 0.11747,12.5,7.87,0,0.524,6.009,82.9,6.2267,5,311,15.2,396.9,13.27,18.9 0.09378,12.5,7.87,0,0.524,5.889,39,5.4509,5,311,15.2,390.5,15.71,21.7 0.62976,0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21,396.9,8.26,20.4 0.63796,0,8.14,0,0.538,6.096,84.5,4.4619,4,307,21,380.02,10.26,18.2 0.62739,0,8.14,0,0.538,5.834,56.5,4.4986,4,307,21,395.62,8.47,19.9 1.05393,0,8.14,0,0.538,5.935,29.3,4.4986,4,307,21,386.85,6.58,23.1 0.7842,0,8.14,0,0.538,5.99,81.7,4.2579,4,307,21,386.75,14.67,17.5 0.80271,0,8.14,0,0.538,5.456,36.6,3.7965,4,307,21,288.99,11.69,20.2 0.7258,0,8.14,0,0.538,5.727,69.5,3.7965,4,307,21,390.95,11.28,18.2 1.25179,0,8.14,0,0.538,5.57,98.1,3.7979,4,307,21,376.57,21.02,13.6 0.85204,0,8.14,0,0.538,5.965,89.2,4.0123,4,307,21,392.53,13.83,19.6 1.23247,0,8.14,0,0.538,6.142,91.7,3.9769,4,307,21,396.9,18.72,15.2 0.98843,0,8.14,0,0.538,5.813,100,4.0952,4,307,21,394.54,19.88,14.5 0.75026,0,8.14,0,0.538,5.924,94.1,4.3996,4,307,21,394.33,16.3,15.6 0.84054,0,8.14,0,0.538,5.599,85.7,4.4546,4,307,21,303.42,16.51,13.9 0.67191,0,8.14,0,0.538,5.813,90.3,4.682,4,307,21,376.88,14.81,16.6 0.95577,0,8.14,0,0.538,6.047,88.8,4.4534,4,307,21,306.38,17.28,14.8 0.77299,0,8.14,0,0.538,6.495,94.4,4.4547,4,307,21,387.94,12.8,18.4 1.00245,0,8.14,0,0.538,6.674,87.3,4.239,4,307,21,380.23,11.98,21 1.13081,0,8.14,0,0.538,5.713,94.1,4.233,4,307,21,360.17,22.6,12.7 1.35472,0,8.14,0,0.538,6.072,100,4.175,4,307,21,376.73,13.04,14.5 1.38799,0,8.14,0,0.538,5.95,82,3.99,4,307,21,232.6,27.71,13.2 1.15172,0,8.14,0,0.538,5.701,95,3.7872,4,307,21,358.77,18.35,13.1 1.61282,0,8.14,0,0.538,6.096,96.9,3.7598,4,307,21,248.31,20.34,13.5 0.06417,0,5.96,0,0.499,5.933,68.2,3.3603,5,279,19.2,396.9,9.68,18.9 0.09744,0,5.96,0,0.499,5.841,61.4,3.3779,5,279,19.2,377.56,11.41,20 0.08014,0,5.96,0,0.499,5.85,41.5,3.9342,5,279,19.2,396.9,8.77,21 0.17505,0,5.96,0,0.499,5.966,30.2,3.8473,5,279,19.2,393.43,10.13,24.7 0.02763,75,2.95,0,0.428,6.595,21.8,5.4011,3,252,18.3,395.63,4.32,30.8 0.03359,75,2.95,0,0.428,7.024,15.8,5.4011,3,252,18.3,395.62,1.98,34.9 0.12744,0,6.91,0,0.448,6.77,2.9,5.7209,3,233,17.9,385.41,4.84,26.6 0.1415,0,6.91,0,0.448,6.169,6.6,5.7209,3,233,17.9,383.37,5.81,25.3 0.15936,0,6.91,0,0.448,6.211,6.5,5.7209,3,233,17.9,394.46,7.44,24.7 0.12269,0,6.91,0,0.448,6.069,40,5.7209,3,233,17.9,389.39,9.55,21.2 0.17142,0,6.91,0,0.448,5.682,33.8,5.1004,3,233,17.9,396.9,10.21,19.3 0.18836,0,6.91,0,0.448,5.786,33.3,5.1004,3,233,17.9,396.9,14.15,20 0.22927,0,6.91,0,0.448,6.03,85.5,5.6894,3,233,17.9,392.74,18.8,16.6 0.25387,0,6.91,0,0.448,5.399,95.3,5.87,3,233,17.9,396.9,30.81,14.4 0.21977,0,6.91,0,0.448,5.602,62,6.0877,3,233,17.9,396.9,16.2,19.4 0.08873,21,5.64,0,0.439,5.963,45.7,6.8147,4,243,16.8,395.56,13.45,19.7 0.04337,21,5.64,0,0.439,6.115,63,6.8147,4,243,16.8,393.97,9.43,20.5 0.0536,21,5.64,0,0.439,6.511,21.1,6.8147,4,243,16.8,396.9,5.28,25 0.04981,21,5.64,0,0.439,5.998,21.4,6.8147,4,243,16.8,396.9,8.43,23.4 0.0136,75,4,0,0.41,5.888,47.6,7.3197,3,469,21.1,396.9,14.8,18.9 0.01311,90,1.22,0,0.403,7.249,21.9,8.6966,5,226,17.9,395.93,4.81,35.4 0.02055,85,0.74,0,0.41,6.383,35.7,9.1876,2,313,17.3,396.9,5.77,24.7 0.01432,100,1.32,0,0.411,6.816,40.5,8.3248,5,256,15.1,392.9,3.95,31.6 0.15445,25,5.13,0,0.453,6.145,29.2,7.8148,8,284,19.7,390.68,6.86,23.3 0.10328,25,5.13,0,0.453,5.927,47.2,6.932,8,284,19.7,396.9,9.22,19.6 0.14932,25,5.13,0,0.453,5.741,66.2,7.2254,8,284,19.7,395.11,13.15,18.7 0.17171,25,5.13,0,0.453,5.966,93.4,6.8185,8,284,19.7,378.08,14.44,16 0.11027,25,5.13,0,0.453,6.456,67.8,7.2255,8,284,19.7,396.9,6.73,22.2 0.1265,25,5.13,0,0.453,6.762,43.4,7.9809,8,284,19.7,395.58,9.5,25 0.01951,17.5,1.38,0,0.4161,7.104,59.5,9.2229,3,216,18.6,393.24,8.05,33 0.03584,80,3.37,0,0.398,6.29,17.8,6.6115,4,337,16.1,396.9,4.67,23.5 0.04379,80,3.37,0,0.398,5.787,31.1,6.6115,4,337,16.1,396.9,10.24,19.4 0.05789,12.5,6.07,0,0.409,5.878,21.4,6.498,4,345,18.9,396.21,8.1,22 0.13554,12.5,6.07,0,0.409,5.594,36.8,6.498,4,345,18.9,396.9,13.09,17.4 0.12816,12.5,6.07,0,0.409,5.885,33,6.498,4,345,18.9,396.9,8.79,20.9 0.08826,0,10.81,0,0.413,6.417,6.6,5.2873,4,305,19.2,383.73,6.72,24.2 0.15876,0,10.81,0,0.413,5.961,17.5,5.2873,4,305,19.2,376.94,9.88,21.7 0.09164,0,10.81,0,0.413,6.065,7.8,5.2873,4,305,19.2,390.91,5.52,22.8 0.19539,0,10.81,0,0.413,6.245,6.2,5.2873,4,305,19.2,377.17,7.54,23.4 0.07896,0,12.83,0,0.437,6.273,6,4.2515,5,398,18.7,394.92,6.78,24.1 0.09512,0,12.83,0,0.437,6.286,45,4.5026,5,398,18.7,383.23,8.94,21.4 0.10153,0,12.83,0,0.437,6.279,74.5,4.0522,5,398,18.7,373.66,11.97,20 0.08707,0,12.83,0,0.437,6.14,45.8,4.0905,5,398,18.7,386.96,10.27,20.8 0.05646,0,12.83,0,0.437,6.232,53.7,5.0141,5,398,18.7,386.4,12.34,21.2 0.08387,0,12.83,0,0.437,5.874,36.6,4.5026,5,398,18.7,396.06,9.1,20.3 0.04113,25,4.86,0,0.426,6.727,33.5,5.4007,4,281,19,396.9,5.29,28 0.04462,25,4.86,0,0.426,6.619,70.4,5.4007,4,281,19,395.63,7.22,23.9 0.03659,25,4.86,0,0.426,6.302,32.2,5.4007,4,281,19,396.9,6.72,24.8 0.03551,25,4.86,0,0.426,6.167,46.7,5.4007,4,281,19,390.64,7.51,22.9 0.05059,0,4.49,0,0.449,6.389,48,4.7794,3,247,18.5,396.9,9.62,23.9 0.05735,0,4.49,0,0.449,6.63,56.1,4.4377,3,247,18.5,392.3,6.53,26.6 0.05188,0,4.49,0,0.449,6.015,45.1,4.4272,3,247,18.5,395.99,12.86,22.5 0.07151,0,4.49,0,0.449,6.121,56.8,3.7476,3,247,18.5,395.15,8.44,22.2 0.0566,0,3.41,0,0.489,7.007,86.3,3.4217,2,270,17.8,396.9,5.5,23.6 0.05302,0,3.41,0,0.489,7.079,63.1,3.4145,2,270,17.8,396.06,5.7,28.7 0.04684,0,3.41,0,0.489,6.417,66.1,3.0923,2,270,17.8,392.18,8.81,22.6 0.03932,0,3.41,0,0.489,6.405,73.9,3.0921,2,270,17.8,393.55,8.2,22 0.04203,28,15.04,0,0.464,6.442,53.6,3.6659,4,270,18.2,395.01,8.16,22.9 0.02875,28,15.04,0,0.464,6.211,28.9,3.6659,4,270,18.2,396.33,6.21,25 0.04294,28,15.04,0,0.464,6.249,77.3,3.615,4,270,18.2,396.9,10.59,20.6 0.12204,0,2.89,0,0.445,6.625,57.8,3.4952,2,276,18,357.98,6.65,28.4 0.11504,0,2.89,0,0.445,6.163,69.6,3.4952,2,276,18,391.83,11.34,21.4 0.12083,0,2.89,0,0.445,8.069,76,3.4952,2,276,18,396.9,4.21,38.7 0.08187,0,2.89,0,0.445,7.82,36.9,3.4952,2,276,18,393.53,3.57,43.8 0.0686,0,2.89,0,0.445,7.416,62.5,3.4952,2,276,18,396.9,6.19,33.2 0.14866,0,8.56,0,0.52,6.727,79.9,2.7778,5,384,20.9,394.76,9.42,27.5 0.11432,0,8.56,0,0.52,6.781,71.3,2.8561,5,384,20.9,395.58,7.67,26.5 0.22876,0,8.56,0,0.52,6.405,85.4,2.7147,5,384,20.9,70.8,10.63,18.6 0.21161,0,8.56,0,0.52,6.137,87.4,2.7147,5,384,20.9,394.47,13.44,19.3 0.1396,0,8.56,0,0.52,6.167,90,2.421,5,384,20.9,392.69,12.33,20.1 0.13262,0,8.56,0,0.52,5.851,96.7,2.1069,5,384,20.9,394.05,16.47,19.5 0.1712,0,8.56,0,0.52,5.836,91.9,2.211,5,384,20.9,395.67,18.66,19.5 0.13117,0,8.56,0,0.52,6.127,85.2,2.1224,5,384,20.9,387.69,14.09,20.4 0.12802,0,8.56,0,0.52,6.474,97.1,2.4329,5,384,20.9,395.24,12.27,19.8 0.26363,0,8.56,0,0.52,6.229,91.2,2.5451,5,384,20.9,391.23,15.55,19.4 0.10793,0,8.56,0,0.52,6.195,54.4,2.7778,5,384,20.9,393.49,13,21.7 0.10084,0,10.01,0,0.547,6.715,81.6,2.6775,6,432,17.8,395.59,10.16,22.8 0.12329,0,10.01,0,0.547,5.913,92.9,2.3534,6,432,17.8,394.95,16.21,18.8 0.22212,0,10.01,0,0.547,6.092,95.4,2.548,6,432,17.8,396.9,17.09,18.7 0.14231,0,10.01,0,0.547,6.254,84.2,2.2565,6,432,17.8,388.74,10.45,18.5 0.17134,0,10.01,0,0.547,5.928,88.2,2.4631,6,432,17.8,344.91,15.76,18.3 0.13158,0,10.01,0,0.547,6.176,72.5,2.7301,6,432,17.8,393.3,12.04,21.2 0.15098,0,10.01,0,0.547,6.021,82.6,2.7474,6,432,17.8,394.51,10.3,19.2 0.13058,0,10.01,0,0.547,5.872,73.1,2.4775,6,432,17.8,338.63,15.37,20.4 0.14476,0,10.01,0,0.547,5.731,65.2,2.7592,6,432,17.8,391.5,13.61,19.3 0.06899,0,25.65,0,0.581,5.87,69.7,2.2577,2,188,19.1,389.15,14.37,22 0.07165,0,25.65,0,0.581,6.004,84.1,2.1974,2,188,19.1,377.67,14.27,20.3 0.09299,0,25.65,0,0.581,5.961,92.9,2.0869,2,188,19.1,378.09,17.93,20.5 0.15038,0,25.65,0,0.581,5.856,97,1.9444,2,188,19.1,370.31,25.41,17.3 0.09849,0,25.65,0,0.581,5.879,95.8,2.0063,2,188,19.1,379.38,17.58,18.8 0.16902,0,25.65,0,0.581,5.986,88.4,1.9929,2,188,19.1,385.02,14.81,21.4 0.38735,0,25.65,0,0.581,5.613,95.6,1.7572,2,188,19.1,359.29,27.26,15.7 0.25915,0,21.89,0,0.624,5.693,96,1.7883,4,437,21.2,392.11,17.19,16.2 0.32543,0,21.89,0,0.624,6.431,98.8,1.8125,4,437,21.2,396.9,15.39,18 0.88125,0,21.89,0,0.624,5.637,94.7,1.9799,4,437,21.2,396.9,18.34,14.3 0.34006,0,21.89,0,0.624,6.458,98.9,2.1185,4,437,21.2,395.04,12.6,19.2 1.19294,0,21.89,0,0.624,6.326,97.7,2.271,4,437,21.2,396.9,12.26,19.6 0.59005,0,21.89,0,0.624,6.372,97.9,2.3274,4,437,21.2,385.76,11.12,23 0.32982,0,21.89,0,0.624,5.822,95.4,2.4699,4,437,21.2,388.69,15.03,18.4 0.97617,0,21.89,0,0.624,5.757,98.4,2.346,4,437,21.2,262.76,17.31,15.6 0.55778,0,21.89,0,0.624,6.335,98.2,2.1107,4,437,21.2,394.67,16.96,18.1 0.32264,0,21.89,0,0.624,5.942,93.5,1.9669,4,437,21.2,378.25,16.9,17.4 0.35233,0,21.89,0,0.624,6.454,98.4,1.8498,4,437,21.2,394.08,14.59,17.1 0.2498,0,21.89,0,0.624,5.857,98.2,1.6686,4,437,21.2,392.04,21.32,13.3 0.54452,0,21.89,0,0.624,6.151,97.9,1.6687,4,437,21.2,396.9,18.46,17.8 0.2909,0,21.89,0,0.624,6.174,93.6,1.6119,4,437,21.2,388.08,24.16,14 1.62864,0,21.89,0,0.624,5.019,100,1.4394,4,437,21.2,396.9,34.41,14.4 3.32105,0,19.58,1,0.871,5.403,100,1.3216,5,403,14.7,396.9,26.82,13.4 4.0974,0,19.58,0,0.871,5.468,100,1.4118,5,403,14.7,396.9,26.42,15.6 2.77974,0,19.58,0,0.871,4.903,97.8,1.3459,5,403,14.7,396.9,29.29,11.8 2.37934,0,19.58,0,0.871,6.13,100,1.4191,5,403,14.7,172.91,27.8,13.8 2.15505,0,19.58,0,0.871,5.628,100,1.5166,5,403,14.7,169.27,16.65,15.6 2.36862,0,19.58,0,0.871,4.926,95.7,1.4608,5,403,14.7,391.71,29.53,14.6 2.33099,0,19.58,0,0.871,5.186,93.8,1.5296,5,403,14.7,356.99,28.32,17.8 2.73397,0,19.58,0,0.871,5.597,94.9,1.5257,5,403,14.7,351.85,21.45,15.4 1.6566,0,19.58,0,0.871,6.122,97.3,1.618,5,403,14.7,372.8,14.1,21.5 1.49632,0,19.58,0,0.871,5.404,100,1.5916,5,403,14.7,341.6,13.28,19.6 1.12658,0,19.58,1,0.871,5.012,88,1.6102,5,403,14.7,343.28,12.12,15.3 2.14918,0,19.58,0,0.871,5.709,98.5,1.6232,5,403,14.7,261.95,15.79,19.4 1.41385,0,19.58,1,0.871,6.129,96,1.7494,5,403,14.7,321.02,15.12,17 3.53501,0,19.58,1,0.871,6.152,82.6,1.7455,5,403,14.7,88.01,15.02,15.6 2.44668,0,19.58,0,0.871,5.272,94,1.7364,5,403,14.7,88.63,16.14,13.1 1.22358,0,19.58,0,0.605,6.943,97.4,1.8773,5,403,14.7,363.43,4.59,41.3 1.34284,0,19.58,0,0.605,6.066,100,1.7573,5,403,14.7,353.89,6.43,24.3 1.42502,0,19.58,0,0.871,6.51,100,1.7659,5,403,14.7,364.31,7.39,23.3 1.27346,0,19.58,1,0.605,6.25,92.6,1.7984,5,403,14.7,338.92,5.5,27 1.46336,0,19.58,0,0.605,7.489,90.8,1.9709,5,403,14.7,374.43,1.73,50 1.83377,0,19.58,1,0.605,7.802,98.2,2.0407,5,403,14.7,389.61,1.92,50 1.51902,0,19.58,1,0.605,8.375,93.9,2.162,5,403,14.7,388.45,3.32,50 2.24236,0,19.58,0,0.605,5.854,91.8,2.422,5,403,14.7,395.11,11.64,22.7 2.924,0,19.58,0,0.605,6.101,93,2.2834,5,403,14.7,240.16,9.81,25 2.01019,0,19.58,0,0.605,7.929,96.2,2.0459,5,403,14.7,369.3,3.7,50 1.80028,0,19.58,0,0.605,5.877,79.2,2.4259,5,403,14.7,227.61,12.14,23.8 2.3004,0,19.58,0,0.605,6.319,96.1,2.1,5,403,14.7,297.09,11.1,23.8 2.44953,0,19.58,0,0.605,6.402,95.2,2.2625,5,403,14.7,330.04,11.32,22.3 1.20742,0,19.58,0,0.605,5.875,94.6,2.4259,5,403,14.7,292.29,14.43,17.4 2.3139,0,19.58,0,0.605,5.88,97.3,2.3887,5,403,14.7,348.13,12.03,19.1 0.13914,0,4.05,0,0.51,5.572,88.5,2.5961,5,296,16.6,396.9,14.69,23.1 0.09178,0,4.05,0,0.51,6.416,84.1,2.6463,5,296,16.6,395.5,9.04,23.6 0.08447,0,4.05,0,0.51,5.859,68.7,2.7019,5,296,16.6,393.23,9.64,22.6 0.06664,0,4.05,0,0.51,6.546,33.1,3.1323,5,296,16.6,390.96,5.33,29.4 0.07022,0,4.05,0,0.51,6.02,47.2,3.5549,5,296,16.6,393.23,10.11,23.2 0.05425,0,4.05,0,0.51,6.315,73.4,3.3175,5,296,16.6,395.6,6.29,24.6 0.06642,0,4.05,0,0.51,6.86,74.4,2.9153,5,296,16.6,391.27,6.92,29.9 0.0578,0,2.46,0,0.488,6.98,58.4,2.829,3,193,17.8,396.9,5.04,37.2 0.06588,0,2.46,0,0.488,7.765,83.3,2.741,3,193,17.8,395.56,7.56,39.8 0.06888,0,2.46,0,0.488,6.144,62.2,2.5979,3,193,17.8,396.9,9.45,36.2 0.09103,0,2.46,0,0.488,7.155,92.2,2.7006,3,193,17.8,394.12,4.82,37.9 0.10008,0,2.46,0,0.488,6.563,95.6,2.847,3,193,17.8,396.9,5.68,32.5 0.08308,0,2.46,0,0.488,5.604,89.8,2.9879,3,193,17.8,391,13.98,26.4 0.06047,0,2.46,0,0.488,6.153,68.8,3.2797,3,193,17.8,387.11,13.15,29.6 0.05602,0,2.46,0,0.488,7.831,53.6,3.1992,3,193,17.8,392.63,4.45,50 0.07875,45,3.44,0,0.437,6.782,41.1,3.7886,5,398,15.2,393.87,6.68,32 0.12579,45,3.44,0,0.437,6.556,29.1,4.5667,5,398,15.2,382.84,4.56,29.8 0.0837,45,3.44,0,0.437,7.185,38.9,4.5667,5,398,15.2,396.9,5.39,34.9 0.09068,45,3.44,0,0.437,6.951,21.5,6.4798,5,398,15.2,377.68,5.1,37 0.06911,45,3.44,0,0.437,6.739,30.8,6.4798,5,398,15.2,389.71,4.69,30.5 0.08664,45,3.44,0,0.437,7.178,26.3,6.4798,5,398,15.2,390.49,2.87,36.4 0.02187,60,2.93,0,0.401,6.8,9.9,6.2196,1,265,15.6,393.37,5.03,31.1 0.01439,60,2.93,0,0.401,6.604,18.8,6.2196,1,265,15.6,376.7,4.38,29.1 0.01381,80,0.46,0,0.422,7.875,32,5.6484,4,255,14.4,394.23,2.97,50 0.04011,80,1.52,0,0.404,7.287,34.1,7.309,2,329,12.6,396.9,4.08,33.3 0.04666,80,1.52,0,0.404,7.107,36.6,7.309,2,329,12.6,354.31,8.61,30.3 0.03768,80,1.52,0,0.404,7.274,38.3,7.309,2,329,12.6,392.2,6.62,34.6 0.0315,95,1.47,0,0.403,6.975,15.3,7.6534,3,402,17,396.9,4.56,34.9 0.01778,95,1.47,0,0.403,7.135,13.9,7.6534,3,402,17,384.3,4.45,32.9 0.03445,82.5,2.03,0,0.415,6.162,38.4,6.27,2,348,14.7,393.77,7.43,24.1 0.02177,82.5,2.03,0,0.415,7.61,15.7,6.27,2,348,14.7,395.38,3.11,42.3 0.0351,95,2.68,0,0.4161,7.853,33.2,5.118,4,224,14.7,392.78,3.81,48.5 0.02009,95,2.68,0,0.4161,8.034,31.9,5.118,4,224,14.7,390.55,2.88,50 0.13642,0,10.59,0,0.489,5.891,22.3,3.9454,4,277,18.6,396.9,10.87,22.6 0.22969,0,10.59,0,0.489,6.326,52.5,4.3549,4,277,18.6,394.87,10.97,24.4 0.25199,0,10.59,0,0.489,5.783,72.7,4.3549,4,277,18.6,389.43,18.06,22.5 0.13587,0,10.59,1,0.489,6.064,59.1,4.2392,4,277,18.6,381.32,14.66,24.4 0.43571,0,10.59,1,0.489,5.344,100,3.875,4,277,18.6,396.9,23.09,20 0.17446,0,10.59,1,0.489,5.96,92.1,3.8771,4,277,18.6,393.25,17.27,21.7 0.37578,0,10.59,1,0.489,5.404,88.6,3.665,4,277,18.6,395.24,23.98,19.3 0.21719,0,10.59,1,0.489,5.807,53.8,3.6526,4,277,18.6,390.94,16.03,22.4 0.14052,0,10.59,0,0.489,6.375,32.3,3.9454,4,277,18.6,385.81,9.38,28.1 0.28955,0,10.59,0,0.489,5.412,9.8,3.5875,4,277,18.6,348.93,29.55,23.7 0.19802,0,10.59,0,0.489,6.182,42.4,3.9454,4,277,18.6,393.63,9.47,25 0.0456,0,13.89,1,0.55,5.888,56,3.1121,5,276,16.4,392.8,13.51,23.3 0.07013,0,13.89,0,0.55,6.642,85.1,3.4211,5,276,16.4,392.78,9.69,28.7 0.11069,0,13.89,1,0.55,5.951,93.8,2.8893,5,276,16.4,396.9,17.92,21.5 0.11425,0,13.89,1,0.55,6.373,92.4,3.3633,5,276,16.4,393.74,10.5,23 0.35809,0,6.2,1,0.507,6.951,88.5,2.8617,8,307,17.4,391.7,9.71,26.7 0.40771,0,6.2,1,0.507,6.164,91.3,3.048,8,307,17.4,395.24,21.46,21.7 0.62356,0,6.2,1,0.507,6.879,77.7,3.2721,8,307,17.4,390.39,9.93,27.5 0.6147,0,6.2,0,0.507,6.618,80.8,3.2721,8,307,17.4,396.9,7.6,30.1 0.31533,0,6.2,0,0.504,8.266,78.3,2.8944,8,307,17.4,385.05,4.14,44.8 0.52693,0,6.2,0,0.504,8.725,83,2.8944,8,307,17.4,382,4.63,50 0.38214,0,6.2,0,0.504,8.04,86.5,3.2157,8,307,17.4,387.38,3.13,37.6 0.41238,0,6.2,0,0.504,7.163,79.9,3.2157,8,307,17.4,372.08,6.36,31.6 0.29819,0,6.2,0,0.504,7.686,17,3.3751,8,307,17.4,377.51,3.92,46.7 0.44178,0,6.2,0,0.504,6.552,21.4,3.3751,8,307,17.4,380.34,3.76,31.5 0.537,0,6.2,0,0.504,5.981,68.1,3.6715,8,307,17.4,378.35,11.65,24.3 0.46296,0,6.2,0,0.504,7.412,76.9,3.6715,8,307,17.4,376.14,5.25,31.7 0.57529,0,6.2,0,0.507,8.337,73.3,3.8384,8,307,17.4,385.91,2.47,41.7 0.33147,0,6.2,0,0.507,8.247,70.4,3.6519,8,307,17.4,378.95,3.95,48.3 0.44791,0,6.2,1,0.507,6.726,66.5,3.6519,8,307,17.4,360.2,8.05,29 0.33045,0,6.2,0,0.507,6.086,61.5,3.6519,8,307,17.4,376.75,10.88,24 0.52058,0,6.2,1,0.507,6.631,76.5,4.148,8,307,17.4,388.45,9.54,25.1 0.51183,0,6.2,0,0.507,7.358,71.6,4.148,8,307,17.4,390.07,4.73,31.5 0.08244,30,4.93,0,0.428,6.481,18.5,6.1899,6,300,16.6,379.41,6.36,23.7 0.09252,30,4.93,0,0.428,6.606,42.2,6.1899,6,300,16.6,383.78,7.37,23.3 0.11329,30,4.93,0,0.428,6.897,54.3,6.3361,6,300,16.6,391.25,11.38,22 0.10612,30,4.93,0,0.428,6.095,65.1,6.3361,6,300,16.6,394.62,12.4,20.1 0.1029,30,4.93,0,0.428,6.358,52.9,7.0355,6,300,16.6,372.75,11.22,22.2 0.12757,30,4.93,0,0.428,6.393,7.8,7.0355,6,300,16.6,374.71,5.19,23.7 0.20608,22,5.86,0,0.431,5.593,76.5,7.9549,7,330,19.1,372.49,12.5,17.6 0.19133,22,5.86,0,0.431,5.605,70.2,7.9549,7,330,19.1,389.13,18.46,18.5 0.33983,22,5.86,0,0.431,6.108,34.9,8.0555,7,330,19.1,390.18,9.16,24.3 0.19657,22,5.86,0,0.431,6.226,79.2,8.0555,7,330,19.1,376.14,10.15,20.5 0.16439,22,5.86,0,0.431,6.433,49.1,7.8265,7,330,19.1,374.71,9.52,24.5 0.19073,22,5.86,0,0.431,6.718,17.5,7.8265,7,330,19.1,393.74,6.56,26.2 0.1403,22,5.86,0,0.431,6.487,13,7.3967,7,330,19.1,396.28,5.9,24.4 0.21409,22,5.86,0,0.431,6.438,8.9,7.3967,7,330,19.1,377.07,3.59,24.8 0.08221,22,5.86,0,0.431,6.957,6.8,8.9067,7,330,19.1,386.09,3.53,29.6 0.36894,22,5.86,0,0.431,8.259,8.4,8.9067,7,330,19.1,396.9,3.54,42.8 0.04819,80,3.64,0,0.392,6.108,32,9.2203,1,315,16.4,392.89,6.57,21.9 0.03548,80,3.64,0,0.392,5.876,19.1,9.2203,1,315,16.4,395.18,9.25,20.9 0.01538,90,3.75,0,0.394,7.454,34.2,6.3361,3,244,15.9,386.34,3.11,44 0.61154,20,3.97,0,0.647,8.704,86.9,1.801,5,264,13,389.7,5.12,50 0.66351,20,3.97,0,0.647,7.333,100,1.8946,5,264,13,383.29,7.79,36 0.65665,20,3.97,0,0.647,6.842,100,2.0107,5,264,13,391.93,6.9,30.1 0.54011,20,3.97,0,0.647,7.203,81.8,2.1121,5,264,13,392.8,9.59,33.8 0.53412,20,3.97,0,0.647,7.52,89.4,2.1398,5,264,13,388.37,7.26,43.1 0.52014,20,3.97,0,0.647,8.398,91.5,2.2885,5,264,13,386.86,5.91,48.8 0.82526,20,3.97,0,0.647,7.327,94.5,2.0788,5,264,13,393.42,11.25,31 0.55007,20,3.97,0,0.647,7.206,91.6,1.9301,5,264,13,387.89,8.1,36.5 0.76162,20,3.97,0,0.647,5.56,62.8,1.9865,5,264,13,392.4,10.45,22.8 0.7857,20,3.97,0,0.647,7.014,84.6,2.1329,5,264,13,384.07,14.79,30.7 0.57834,20,3.97,0,0.575,8.297,67,2.4216,5,264,13,384.54,7.44,50 0.5405,20,3.97,0,0.575,7.47,52.6,2.872,5,264,13,390.3,3.16,43.5 0.09065,20,6.96,1,0.464,5.92,61.5,3.9175,3,223,18.6,391.34,13.65,20.7 0.29916,20,6.96,0,0.464,5.856,42.1,4.429,3,223,18.6,388.65,13,21.1 0.16211,20,6.96,0,0.464,6.24,16.3,4.429,3,223,18.6,396.9,6.59,25.2 0.1146,20,6.96,0,0.464,6.538,58.7,3.9175,3,223,18.6,394.96,7.73,24.4 0.22188,20,6.96,1,0.464,7.691,51.8,4.3665,3,223,18.6,390.77,6.58,35.2 0.05644,40,6.41,1,0.447,6.758,32.9,4.0776,4,254,17.6,396.9,3.53,32.4 0.09604,40,6.41,0,0.447,6.854,42.8,4.2673,4,254,17.6,396.9,2.98,32 0.10469,40,6.41,1,0.447,7.267,49,4.7872,4,254,17.6,389.25,6.05,33.2 0.06127,40,6.41,1,0.447,6.826,27.6,4.8628,4,254,17.6,393.45,4.16,33.1 0.07978,40,6.41,0,0.447,6.482,32.1,4.1403,4,254,17.6,396.9,7.19,29.1 0.21038,20,3.33,0,0.4429,6.812,32.2,4.1007,5,216,14.9,396.9,4.85,35.1 0.03578,20,3.33,0,0.4429,7.82,64.5,4.6947,5,216,14.9,387.31,3.76,45.4 0.03705,20,3.33,0,0.4429,6.968,37.2,5.2447,5,216,14.9,392.23,4.59,35.4 0.06129,20,3.33,1,0.4429,7.645,49.7,5.2119,5,216,14.9,377.07,3.01,46 0.01501,90,1.21,1,0.401,7.923,24.8,5.885,1,198,13.6,395.52,3.16,50 0.00906,90,2.97,0,0.4,7.088,20.8,7.3073,1,285,15.3,394.72,7.85,32.2 0.01096,55,2.25,0,0.389,6.453,31.9,7.3073,1,300,15.3,394.72,8.23,22 0.01965,80,1.76,0,0.385,6.23,31.5,9.0892,1,241,18.2,341.6,12.93,20.1 0.03871,52.5,5.32,0,0.405,6.209,31.3,7.3172,6,293,16.6,396.9,7.14,23.2 0.0459,52.5,5.32,0,0.405,6.315,45.6,7.3172,6,293,16.6,396.9,7.6,22.3 0.04297,52.5,5.32,0,0.405,6.565,22.9,7.3172,6,293,16.6,371.72,9.51,24.8 0.03502,80,4.95,0,0.411,6.861,27.9,5.1167,4,245,19.2,396.9,3.33,28.5 0.07886,80,4.95,0,0.411,7.148,27.7,5.1167,4,245,19.2,396.9,3.56,37.3 0.03615,80,4.95,0,0.411,6.63,23.4,5.1167,4,245,19.2,396.9,4.7,27.9 0.08265,0,13.92,0,0.437,6.127,18.4,5.5027,4,289,16,396.9,8.58,23.9 0.08199,0,13.92,0,0.437,6.009,42.3,5.5027,4,289,16,396.9,10.4,21.7 0.12932,0,13.92,0,0.437,6.678,31.1,5.9604,4,289,16,396.9,6.27,28.6 0.05372,0,13.92,0,0.437,6.549,51,5.9604,4,289,16,392.85,7.39,27.1 0.14103,0,13.92,0,0.437,5.79,58,6.32,4,289,16,396.9,15.84,20.3 0.06466,70,2.24,0,0.4,6.345,20.1,7.8278,5,358,14.8,368.24,4.97,22.5 0.05561,70,2.24,0,0.4,7.041,10,7.8278,5,358,14.8,371.58,4.74,29 0.04417,70,2.24,0,0.4,6.871,47.4,7.8278,5,358,14.8,390.86,6.07,24.8 0.03537,34,6.09,0,0.433,6.59,40.4,5.4917,7,329,16.1,395.75,9.5,22 0.09266,34,6.09,0,0.433,6.495,18.4,5.4917,7,329,16.1,383.61,8.67,26.4 0.1,34,6.09,0,0.433,6.982,17.7,5.4917,7,329,16.1,390.43,4.86,33.1 0.05515,33,2.18,0,0.472,7.236,41.1,4.022,7,222,18.4,393.68,6.93,36.1 0.05479,33,2.18,0,0.472,6.616,58.1,3.37,7,222,18.4,393.36,8.93,28.4 0.07503,33,2.18,0,0.472,7.42,71.9,3.0992,7,222,18.4,396.9,6.47,33.4 0.04932,33,2.18,0,0.472,6.849,70.3,3.1827,7,222,18.4,396.9,7.53,28.2 0.49298,0,9.9,0,0.544,6.635,82.5,3.3175,4,304,18.4,396.9,4.54,22.8 0.3494,0,9.9,0,0.544,5.972,76.7,3.1025,4,304,18.4,396.24,9.97,20.3 2.63548,0,9.9,0,0.544,4.973,37.8,2.5194,4,304,18.4,350.45,12.64,16.1 0.79041,0,9.9,0,0.544,6.122,52.8,2.6403,4,304,18.4,396.9,5.98,22.1 0.26169,0,9.9,0,0.544,6.023,90.4,2.834,4,304,18.4,396.3,11.72,19.4 0.26938,0,9.9,0,0.544,6.266,82.8,3.2628,4,304,18.4,393.39,7.9,21.6 0.3692,0,9.9,0,0.544,6.567,87.3,3.6023,4,304,18.4,395.69,9.28,23.8 0.25356,0,9.9,0,0.544,5.705,77.7,3.945,4,304,18.4,396.42,11.5,16.2 0.31827,0,9.9,0,0.544,5.914,83.2,3.9986,4,304,18.4,390.7,18.33,17.8 0.24522,0,9.9,0,0.544,5.782,71.7,4.0317,4,304,18.4,396.9,15.94,19.8 0.40202,0,9.9,0,0.544,6.382,67.2,3.5325,4,304,18.4,395.21,10.36,23.1 0.47547,0,9.9,0,0.544,6.113,58.8,4.0019,4,304,18.4,396.23,12.73,21 0.1676,0,7.38,0,0.493,6.426,52.3,4.5404,5,287,19.6,396.9,7.2,23.8 0.18159,0,7.38,0,0.493,6.376,54.3,4.5404,5,287,19.6,396.9,6.87,23.1 0.35114,0,7.38,0,0.493,6.041,49.9,4.7211,5,287,19.6,396.9,7.7,20.4 0.28392,0,7.38,0,0.493,5.708,74.3,4.7211,5,287,19.6,391.13,11.74,18.5 0.34109,0,7.38,0,0.493,6.415,40.1,4.7211,5,287,19.6,396.9,6.12,25 0.19186,0,7.38,0,0.493,6.431,14.7,5.4159,5,287,19.6,393.68,5.08,24.6 0.30347,0,7.38,0,0.493,6.312,28.9,5.4159,5,287,19.6,396.9,6.15,23 0.24103,0,7.38,0,0.493,6.083,43.7,5.4159,5,287,19.6,396.9,12.79,22.2 0.06617,0,3.24,0,0.46,5.868,25.8,5.2146,4,430,16.9,382.44,9.97,19.3 0.06724,0,3.24,0,0.46,6.333,17.2,5.2146,4,430,16.9,375.21,7.34,22.6 0.04544,0,3.24,0,0.46,6.144,32.2,5.8736,4,430,16.9,368.57,9.09,19.8 0.05023,35,6.06,0,0.4379,5.706,28.4,6.6407,1,304,16.9,394.02,12.43,17.1 0.03466,35,6.06,0,0.4379,6.031,23.3,6.6407,1,304,16.9,362.25,7.83,19.4 0.05083,0,5.19,0,0.515,6.316,38.1,6.4584,5,224,20.2,389.71,5.68,22.2 0.03738,0,5.19,0,0.515,6.31,38.5,6.4584,5,224,20.2,389.4,6.75,20.7 0.03961,0,5.19,0,0.515,6.037,34.5,5.9853,5,224,20.2,396.9,8.01,21.1 0.03427,0,5.19,0,0.515,5.869,46.3,5.2311,5,224,20.2,396.9,9.8,19.5 0.03041,0,5.19,0,0.515,5.895,59.6,5.615,5,224,20.2,394.81,10.56,18.5 0.03306,0,5.19,0,0.515,6.059,37.3,4.8122,5,224,20.2,396.14,8.51,20.6 0.05497,0,5.19,0,0.515,5.985,45.4,4.8122,5,224,20.2,396.9,9.74,19 0.06151,0,5.19,0,0.515,5.968,58.5,4.8122,5,224,20.2,396.9,9.29,18.7 0.01301,35,1.52,0,0.442,7.241,49.3,7.0379,1,284,15.5,394.74,5.49,32.7 0.02498,0,1.89,0,0.518,6.54,59.7,6.2669,1,422,15.9,389.96,8.65,16.5 0.02543,55,3.78,0,0.484,6.696,56.4,5.7321,5,370,17.6,396.9,7.18,23.9 0.03049,55,3.78,0,0.484,6.874,28.1,6.4654,5,370,17.6,387.97,4.61,31.2 0.03113,0,4.39,0,0.442,6.014,48.5,8.0136,3,352,18.8,385.64,10.53,17.5 0.06162,0,4.39,0,0.442,5.898,52.3,8.0136,3,352,18.8,364.61,12.67,17.2 0.0187,85,4.15,0,0.429,6.516,27.7,8.5353,4,351,17.9,392.43,6.36,23.1 0.01501,80,2.01,0,0.435,6.635,29.7,8.344,4,280,17,390.94,5.99,24.5 0.02899,40,1.25,0,0.429,6.939,34.5,8.7921,1,335,19.7,389.85,5.89,26.6 0.06211,40,1.25,0,0.429,6.49,44.4,8.7921,1,335,19.7,396.9,5.98,22.9 0.0795,60,1.69,0,0.411,6.579,35.9,10.7103,4,411,18.3,370.78,5.49,24.1 0.07244,60,1.69,0,0.411,5.884,18.5,10.7103,4,411,18.3,392.33,7.79,18.6 0.01709,90,2.02,0,0.41,6.728,36.1,12.1265,5,187,17,384.46,4.5,30.1 0.04301,80,1.91,0,0.413,5.663,21.9,10.5857,4,334,22,382.8,8.05,18.2 0.10659,80,1.91,0,0.413,5.936,19.5,10.5857,4,334,22,376.04,5.57,20.6 8.98296,0,18.1,1,0.77,6.212,97.4,2.1222,24,666,20.2,377.73,17.6,17.8 3.8497,0,18.1,1,0.77,6.395,91,2.5052,24,666,20.2,391.34,13.27,21.7 5.20177,0,18.1,1,0.77,6.127,83.4,2.7227,24,666,20.2,395.43,11.48,22.7 4.26131,0,18.1,0,0.77,6.112,81.3,2.5091,24,666,20.2,390.74,12.67,22.6 4.54192,0,18.1,0,0.77,6.398,88,2.5182,24,666,20.2,374.56,7.79,25 3.83684,0,18.1,0,0.77,6.251,91.1,2.2955,24,666,20.2,350.65,14.19,19.9 3.67822,0,18.1,0,0.77,5.362,96.2,2.1036,24,666,20.2,380.79,10.19,20.8 4.22239,0,18.1,1,0.77,5.803,89,1.9047,24,666,20.2,353.04,14.64,16.8 3.47428,0,18.1,1,0.718,8.78,82.9,1.9047,24,666,20.2,354.55,5.29,21.9 4.55587,0,18.1,0,0.718,3.561,87.9,1.6132,24,666,20.2,354.7,7.12,27.5 3.69695,0,18.1,0,0.718,4.963,91.4,1.7523,24,666,20.2,316.03,14,21.9 13.5222,0,18.1,0,0.631,3.863,100,1.5106,24,666,20.2,131.42,13.33,23.1 4.89822,0,18.1,0,0.631,4.97,100,1.3325,24,666,20.2,375.52,3.26,50 5.66998,0,18.1,1,0.631,6.683,96.8,1.3567,24,666,20.2,375.33,3.73,50 6.53876,0,18.1,1,0.631,7.016,97.5,1.2024,24,666,20.2,392.05,2.96,50 9.2323,0,18.1,0,0.631,6.216,100,1.1691,24,666,20.2,366.15,9.53,50 8.26725,0,18.1,1,0.668,5.875,89.6,1.1296,24,666,20.2,347.88,8.88,50 11.1081,0,18.1,0,0.668,4.906,100,1.1742,24,666,20.2,396.9,34.77,13.8 18.4982,0,18.1,0,0.668,4.138,100,1.137,24,666,20.2,396.9,37.97,13.8 19.6091,0,18.1,0,0.671,7.313,97.9,1.3163,24,666,20.2,396.9,13.44,15 15.288,0,18.1,0,0.671,6.649,93.3,1.3449,24,666,20.2,363.02,23.24,13.9 9.82349,0,18.1,0,0.671,6.794,98.8,1.358,24,666,20.2,396.9,21.24,13.3 23.6482,0,18.1,0,0.671,6.38,96.2,1.3861,24,666,20.2,396.9,23.69,13.1 17.8667,0,18.1,0,0.671,6.223,100,1.3861,24,666,20.2,393.74,21.78,10.2 88.9762,0,18.1,0,0.671,6.968,91.9,1.4165,24,666,20.2,396.9,17.21,10.4 15.8744,0,18.1,0,0.671,6.545,99.1,1.5192,24,666,20.2,396.9,21.08,10.9 9.18702,0,18.1,0,0.7,5.536,100,1.5804,24,666,20.2,396.9,23.6,11.3 7.99248,0,18.1,0,0.7,5.52,100,1.5331,24,666,20.2,396.9,24.56,12.3 20.0849,0,18.1,0,0.7,4.368,91.2,1.4395,24,666,20.2,285.83,30.63,8.8 16.8118,0,18.1,0,0.7,5.277,98.1,1.4261,24,666,20.2,396.9,30.81,7.2 24.3938,0,18.1,0,0.7,4.652,100,1.4672,24,666,20.2,396.9,28.28,10.5 22.5971,0,18.1,0,0.7,5,89.5,1.5184,24,666,20.2,396.9,31.99,7.4 14.3337,0,18.1,0,0.7,4.88,100,1.5895,24,666,20.2,372.92,30.62,10.2 8.15174,0,18.1,0,0.7,5.39,98.9,1.7281,24,666,20.2,396.9,20.85,11.5 6.96215,0,18.1,0,0.7,5.713,97,1.9265,24,666,20.2,394.43,17.11,15.1 5.29305,0,18.1,0,0.7,6.051,82.5,2.1678,24,666,20.2,378.38,18.76,23.2 11.5779,0,18.1,0,0.7,5.036,97,1.77,24,666,20.2,396.9,25.68,9.7 8.64476,0,18.1,0,0.693,6.193,92.6,1.7912,24,666,20.2,396.9,15.17,13.8 13.3598,0,18.1,0,0.693,5.887,94.7,1.7821,24,666,20.2,396.9,16.35,12.7 8.71675,0,18.1,0,0.693,6.471,98.8,1.7257,24,666,20.2,391.98,17.12,13.1 5.87205,0,18.1,0,0.693,6.405,96,1.6768,24,666,20.2,396.9,19.37,12.5 7.67202,0,18.1,0,0.693,5.747,98.9,1.6334,24,666,20.2,393.1,19.92,8.5 38.3518,0,18.1,0,0.693,5.453,100,1.4896,24,666,20.2,396.9,30.59,5 9.91655,0,18.1,0,0.693,5.852,77.8,1.5004,24,666,20.2,338.16,29.97,6.3 25.0461,0,18.1,0,0.693,5.987,100,1.5888,24,666,20.2,396.9,26.77,5.6 14.2362,0,18.1,0,0.693,6.343,100,1.5741,24,666,20.2,396.9,20.32,7.2 9.59571,0,18.1,0,0.693,6.404,100,1.639,24,666,20.2,376.11,20.31,12.1 24.8017,0,18.1,0,0.693,5.349,96,1.7028,24,666,20.2,396.9,19.77,8.3 41.5292,0,18.1,0,0.693,5.531,85.4,1.6074,24,666,20.2,329.46,27.38,8.5 67.9208,0,18.1,0,0.693,5.683,100,1.4254,24,666,20.2,384.97,22.98,5 20.7162,0,18.1,0,0.659,4.138,100,1.1781,24,666,20.2,370.22,23.34,11.9 11.9511,0,18.1,0,0.659,5.608,100,1.2852,24,666,20.2,332.09,12.13,27.9 7.40389,0,18.1,0,0.597,5.617,97.9,1.4547,24,666,20.2,314.64,26.4,17.2 14.4383,0,18.1,0,0.597,6.852,100,1.4655,24,666,20.2,179.36,19.78,27.5 51.1358,0,18.1,0,0.597,5.757,100,1.413,24,666,20.2,2.6,10.11,15 14.0507,0,18.1,0,0.597,6.657,100,1.5275,24,666,20.2,35.05,21.22,17.2 18.811,0,18.1,0,0.597,4.628,100,1.5539,24,666,20.2,28.79,34.37,17.9 28.6558,0,18.1,0,0.597,5.155,100,1.5894,24,666,20.2,210.97,20.08,16.3 45.7461,0,18.1,0,0.693,4.519,100,1.6582,24,666,20.2,88.27,36.98,7 18.0846,0,18.1,0,0.679,6.434,100,1.8347,24,666,20.2,27.25,29.05,7.2 10.8342,0,18.1,0,0.679,6.782,90.8,1.8195,24,666,20.2,21.57,25.79,7.5 25.9406,0,18.1,0,0.679,5.304,89.1,1.6475,24,666,20.2,127.36,26.64,10.4 73.5341,0,18.1,0,0.679,5.957,100,1.8026,24,666,20.2,16.45,20.62,8.8 11.8123,0,18.1,0,0.718,6.824,76.5,1.794,24,666,20.2,48.45,22.74,8.4 11.0874,0,18.1,0,0.718,6.411,100,1.8589,24,666,20.2,318.75,15.02,16.7 7.02259,0,18.1,0,0.718,6.006,95.3,1.8746,24,666,20.2,319.98,15.7,14.2 12.0482,0,18.1,0,0.614,5.648,87.6,1.9512,24,666,20.2,291.55,14.1,20.8 7.05042,0,18.1,0,0.614,6.103,85.1,2.0218,24,666,20.2,2.52,23.29,13.4 8.79212,0,18.1,0,0.584,5.565,70.6,2.0635,24,666,20.2,3.65,17.16,11.7 15.8603,0,18.1,0,0.679,5.896,95.4,1.9096,24,666,20.2,7.68,24.39,8.3 12.2472,0,18.1,0,0.584,5.837,59.7,1.9976,24,666,20.2,24.65,15.69,10.2 37.6619,0,18.1,0,0.679,6.202,78.7,1.8629,24,666,20.2,18.82,14.52,10.9 7.36711,0,18.1,0,0.679,6.193,78.1,1.9356,24,666,20.2,96.73,21.52,11 9.33889,0,18.1,0,0.679,6.38,95.6,1.9682,24,666,20.2,60.72,24.08,9.5 8.49213,0,18.1,0,0.584,6.348,86.1,2.0527,24,666,20.2,83.45,17.64,14.5 10.0623,0,18.1,0,0.584,6.833,94.3,2.0882,24,666,20.2,81.33,19.69,14.1 6.44405,0,18.1,0,0.584,6.425,74.8,2.2004,24,666,20.2,97.95,12.03,16.1 5.58107,0,18.1,0,0.713,6.436,87.9,2.3158,24,666,20.2,100.19,16.22,14.3 13.9134,0,18.1,0,0.713,6.208,95,2.2222,24,666,20.2,100.63,15.17,11.7 11.1604,0,18.1,0,0.74,6.629,94.6,2.1247,24,666,20.2,109.85,23.27,13.4 14.4208,0,18.1,0,0.74,6.461,93.3,2.0026,24,666,20.2,27.49,18.05,9.6 15.1772,0,18.1,0,0.74,6.152,100,1.9142,24,666,20.2,9.32,26.45,8.7 13.6781,0,18.1,0,0.74,5.935,87.9,1.8206,24,666,20.2,68.95,34.02,8.4 9.39063,0,18.1,0,0.74,5.627,93.9,1.8172,24,666,20.2,396.9,22.88,12.8 22.0511,0,18.1,0,0.74,5.818,92.4,1.8662,24,666,20.2,391.45,22.11,10.5 9.72418,0,18.1,0,0.74,6.406,97.2,2.0651,24,666,20.2,385.96,19.52,17.1 5.66637,0,18.1,0,0.74,6.219,100,2.0048,24,666,20.2,395.69,16.59,18.4 9.96654,0,18.1,0,0.74,6.485,100,1.9784,24,666,20.2,386.73,18.85,15.4 12.8023,0,18.1,0,0.74,5.854,96.6,1.8956,24,666,20.2,240.52,23.79,10.8 10.6718,0,18.1,0,0.74,6.459,94.8,1.9879,24,666,20.2,43.06,23.98,11.8 6.28807,0,18.1,0,0.74,6.341,96.4,2.072,24,666,20.2,318.01,17.79,14.9 9.92485,0,18.1,0,0.74,6.251,96.6,2.198,24,666,20.2,388.52,16.44,12.6 9.32909,0,18.1,0,0.713,6.185,98.7,2.2616,24,666,20.2,396.9,18.13,14.1 7.52601,0,18.1,0,0.713,6.417,98.3,2.185,24,666,20.2,304.21,19.31,13 6.71772,0,18.1,0,0.713,6.749,92.6,2.3236,24,666,20.2,0.32,17.44,13.4 5.44114,0,18.1,0,0.713,6.655,98.2,2.3552,24,666,20.2,355.29,17.73,15.2 5.09017,0,18.1,0,0.713,6.297,91.8,2.3682,24,666,20.2,385.09,17.27,16.1 8.24809,0,18.1,0,0.713,7.393,99.3,2.4527,24,666,20.2,375.87,16.74,17.8 9.51363,0,18.1,0,0.713,6.728,94.1,2.4961,24,666,20.2,6.68,18.71,14.9 4.75237,0,18.1,0,0.713,6.525,86.5,2.4358,24,666,20.2,50.92,18.13,14.1 4.66883,0,18.1,0,0.713,5.976,87.9,2.5806,24,666,20.2,10.48,19.01,12.7 8.20058,0,18.1,0,0.713,5.936,80.3,2.7792,24,666,20.2,3.5,16.94,13.5 7.75223,0,18.1,0,0.713,6.301,83.7,2.7831,24,666,20.2,272.21,16.23,14.9 6.80117,0,18.1,0,0.713,6.081,84.4,2.7175,24,666,20.2,396.9,14.7,20 4.81213,0,18.1,0,0.713,6.701,90,2.5975,24,666,20.2,255.23,16.42,16.4 3.69311,0,18.1,0,0.713,6.376,88.4,2.5671,24,666,20.2,391.43,14.65,17.7 6.65492,0,18.1,0,0.713,6.317,83,2.7344,24,666,20.2,396.9,13.99,19.5 5.82115,0,18.1,0,0.713,6.513,89.9,2.8016,24,666,20.2,393.82,10.29,20.2 7.83932,0,18.1,0,0.655,6.209,65.4,2.9634,24,666,20.2,396.9,13.22,21.4 3.1636,0,18.1,0,0.655,5.759,48.2,3.0665,24,666,20.2,334.4,14.13,19.9 3.77498,0,18.1,0,0.655,5.952,84.7,2.8715,24,666,20.2,22.01,17.15,19 4.42228,0,18.1,0,0.584,6.003,94.5,2.5403,24,666,20.2,331.29,21.32,19.1 15.5757,0,18.1,0,0.58,5.926,71,2.9084,24,666,20.2,368.74,18.13,19.1 13.0751,0,18.1,0,0.58,5.713,56.7,2.8237,24,666,20.2,396.9,14.76,20.1 4.34879,0,18.1,0,0.58,6.167,84,3.0334,24,666,20.2,396.9,16.29,19.9 4.03841,0,18.1,0,0.532,6.229,90.7,3.0993,24,666,20.2,395.33,12.87,19.6 3.56868,0,18.1,0,0.58,6.437,75,2.8965,24,666,20.2,393.37,14.36,23.2 4.64689,0,18.1,0,0.614,6.98,67.6,2.5329,24,666,20.2,374.68,11.66,29.8 8.05579,0,18.1,0,0.584,5.427,95.4,2.4298,24,666,20.2,352.58,18.14,13.8 6.39312,0,18.1,0,0.584,6.162,97.4,2.206,24,666,20.2,302.76,24.1,13.3 4.87141,0,18.1,0,0.614,6.484,93.6,2.3053,24,666,20.2,396.21,18.68,16.7 15.0234,0,18.1,0,0.614,5.304,97.3,2.1007,24,666,20.2,349.48,24.91,12 10.233,0,18.1,0,0.614,6.185,96.7,2.1705,24,666,20.2,379.7,18.03,14.6 14.3337,0,18.1,0,0.614,6.229,88,1.9512,24,666,20.2,383.32,13.11,21.4 5.82401,0,18.1,0,0.532,6.242,64.7,3.4242,24,666,20.2,396.9,10.74,23 5.70818,0,18.1,0,0.532,6.75,74.9,3.3317,24,666,20.2,393.07,7.74,23.7 5.73116,0,18.1,0,0.532,7.061,77,3.4106,24,666,20.2,395.28,7.01,25 2.81838,0,18.1,0,0.532,5.762,40.3,4.0983,24,666,20.2,392.92,10.42,21.8 2.37857,0,18.1,0,0.583,5.871,41.9,3.724,24,666,20.2,370.73,13.34,20.6 3.67367,0,18.1,0,0.583,6.312,51.9,3.9917,24,666,20.2,388.62,10.58,21.2 5.69175,0,18.1,0,0.583,6.114,79.8,3.5459,24,666,20.2,392.68,14.98,19.1 4.83567,0,18.1,0,0.583,5.905,53.2,3.1523,24,666,20.2,388.22,11.45,20.6 0.15086,0,27.74,0,0.609,5.454,92.7,1.8209,4,711,20.1,395.09,18.06,15.2 0.18337,0,27.74,0,0.609,5.414,98.3,1.7554,4,711,20.1,344.05,23.97,7 0.20746,0,27.74,0,0.609,5.093,98,1.8226,4,711,20.1,318.43,29.68,8.1 0.10574,0,27.74,0,0.609,5.983,98.8,1.8681,4,711,20.1,390.11,18.07,13.6 0.11132,0,27.74,0,0.609,5.983,83.5,2.1099,4,711,20.1,396.9,13.35,20.1 0.17331,0,9.69,0,0.585,5.707,54,2.3817,6,391,19.2,396.9,12.01,21.8 0.27957,0,9.69,0,0.585,5.926,42.6,2.3817,6,391,19.2,396.9,13.59,24.5 0.17899,0,9.69,0,0.585,5.67,28.8,2.7986,6,391,19.2,393.29,17.6,23.1 0.2896,0,9.69,0,0.585,5.39,72.9,2.7986,6,391,19.2,396.9,21.14,19.7 0.26838,0,9.69,0,0.585,5.794,70.6,2.8927,6,391,19.2,396.9,14.1,18.3 0.23912,0,9.69,0,0.585,6.019,65.3,2.4091,6,391,19.2,396.9,12.92,21.2 0.17783,0,9.69,0,0.585,5.569,73.5,2.3999,6,391,19.2,395.77,15.1,17.5 0.22438,0,9.69,0,0.585,6.027,79.7,2.4982,6,391,19.2,396.9,14.33,16.8 0.06263,0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21,391.99,9.67,22.4 0.04527,0,11.93,0,0.573,6.12,76.7,2.2875,1,273,21,396.9,9.08,20.6 0.06076,0,11.93,0,0.573,6.976,91,2.1675,1,273,21,396.9,5.64,23.9 0.10959,0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21,393.45,6.48,22 0.04741,0,11.93,0,0.573,6.03,80.8,2.505,1,273,21,396.9,7.88,11.9 -------------------------------------------------------------------------------- /Homework1-Linear-optimization-and-Robust-linear-regression/hw1-answers-Nhi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework1-Linear-optimization-and-Robust-linear-regression/hw1-answers-Nhi.pdf -------------------------------------------------------------------------------- /Homework1-Linear-optimization-and-Robust-linear-regression/hw2-linear-optimization-and-robust-linear-regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework1-Linear-optimization-and-Robust-linear-regression/hw2-linear-optimization-and-robust-linear-regression.pdf -------------------------------------------------------------------------------- /Homework2-Algorithmic-framework-for-linear-regression-and-convex-regression/1.Algorithmic-Framework-for-Linear-Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 15.095 Homework 2 - October 10, 2018\n", 8 | "### Kim-Anh-Nhi Nguyen - MIT ID: 913785552" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## 1. Algorithmic Framework for Linear Regression" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### a. Read the data sets and separate into training, validation and testing sets (50%, 25% and 25% respectively)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "using JuMP, Gurobi, DataFrames" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stderr", 41 | "output_type": "stream", 42 | "text": [ 43 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 44 | "Stacktrace:\n", 45 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 46 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 47 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 48 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 49 | " [5] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:88\u001b[22m\u001b[22m\n", 50 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:180\u001b[22m\u001b[22m\n", 51 | " [7] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n", 52 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 53 | " [9] \u001b[1m(::IJulia.##15#18)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 54 | "while loading In[2], in expression starting on line 1\n", 55 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 56 | "Stacktrace:\n", 57 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 58 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 59 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 60 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 61 | " [5] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:88\u001b[22m\u001b[22m\n", 62 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:180\u001b[22m\u001b[22m\n", 63 | " [7] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n", 64 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 65 | " [9] \u001b[1m(::IJulia.##15#18)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 66 | "while loading In[2], in expression starting on line 2\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "df1 = readtable(\"data1.csv\", header=true);\n", 72 | "df2 = readtable(\"data2.csv\", header=true);" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "function separate_dataset(dataset)\n", 82 | " show=true\n", 83 | "\n", 84 | " X, y = Matrix(dataset[:,1:end-1]), dataset[:,end]\n", 85 | " \n", 86 | " # Split into training, validation and test sets (50%/25%/25%)\n", 87 | " n,p = size(X)\n", 88 | " val_start, test_start = round(Int, 0.50 * n), round(Int, 0.75 * n)\n", 89 | " \n", 90 | " X_train, y_train = X[1:val_start - 1, :], y[1:val_start - 1]\n", 91 | " X_val, y_val = X[val_start:test_start - 1, :], y[val_start:test_start - 1]\n", 92 | " X_test, y_test= X[test_start:end, :], y[test_start:end]\n", 93 | " \n", 94 | " if show==true\n", 95 | " #See the size of training, validation and testing sets\n", 96 | " println(\"\\nSize of training set:\",size(X_train),size(y_train))\n", 97 | " println(\"Size of validation set:\",size(X_val),size(y_val))\n", 98 | " println(\"Size of test set:\",size(X_test),size(y_test))\n", 99 | " end\n", 100 | " \n", 101 | " return(X_train,y_train, X_val, y_val, X_test, y_test)\n", 102 | "end;" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "### Apply nonlinear transformations" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 23, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "transforms = [x -> x, x -> x.^2, x -> sqrt.(x), x -> log.(x)]\n", 119 | "function transformations(X_train, X_val, X_test)\n", 120 | " show=true\n", 121 | " p_orig = size(X_train,2)\n", 122 | " apply_transforms(X) = hcat([transforms[i](X) for i = 1:length(transforms)]...)\n", 123 | " p2 = p_orig * length(transforms)\n", 124 | " X_train2 = apply_transforms(X_train)\n", 125 | " X_val2 = apply_transforms(X_val)\n", 126 | " X_test2 = apply_transforms(X_test)\n", 127 | " \n", 128 | " if show==true\n", 129 | " #See the size of training, validation and testing sets\n", 130 | " println(\"\\nNew size of training set:\",size(X_train2))\n", 131 | " println(\"New size of validation set:\",size(X_val2))\n", 132 | " println(\"New size of test set:\",size(X_test2))\n", 133 | " end\n", 134 | " \n", 135 | " return(p_orig, X_train2, X_val2, X_test2)\n", 136 | "end; " 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 24, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "\n", 149 | "Size of training set:(124, 15)(124,)\n", 150 | "Size of validation set:(63, 15)(63,)\n", 151 | "Size of test set:(63, 15)(63,)\n", 152 | "\n", 153 | "New size of training set:(124, 60)\n", 154 | "New size of validation set:(63, 60)\n", 155 | "New size of test set:(63, 60)\n", 156 | "\n", 157 | "Size of training set:(249, 7)(249,)\n", 158 | "Size of validation set:(125, 7)(125,)\n", 159 | "Size of test set:(126, 7)(126,)\n", 160 | "\n", 161 | "New size of training set:(249, 28)\n", 162 | "New size of validation set:(125, 28)\n", 163 | "New size of test set:(126, 28)\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "X_train1,y_train1, X_val1, y_val1, X_test1, y_test1 = separate_dataset(df1)\n", 169 | "p_orig1, X_train1, X_val1, X_test1 = transformations(X_train1, X_val1, X_test1)\n", 170 | "\n", 171 | "X_train2,y_train2, X_val2, y_val2, X_test2, y_test2 = separate_dataset(df2)\n", 172 | "p_orig2, X_train2, X_val2, X_test2 = transformations(X_train2, X_val2, X_test2);" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "### Compute the correlations" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "function sorted_correlations(cor_matrix)\n", 189 | " c = copy(abs.(cor_matrix))\n", 190 | " p = length(cor_matrix[1,:])\n", 191 | " num_pairs = convert(Int64,p*(p-1)/2)\n", 192 | " pair_list = zeros(Int64, num_pairs, 2)\n", 193 | " magnitude = zeros(num_pairs)\n", 194 | "\n", 195 | " # Set lower triangular correlation values = 0\n", 196 | " for i=1:p\n", 197 | " for j=1:i\n", 198 | " c[i,j] = 0\n", 199 | " end\n", 200 | " end\n", 201 | "\n", 202 | " for i=1:num_pairs\n", 203 | " ind = indmax(c)\n", 204 | " col = floor(ind/p) + 1\n", 205 | " row = ind % p\n", 206 | " magnitude[i] = c[ind]\n", 207 | " pair_list[i,1] = row\n", 208 | " pair_list[i,2] = col\n", 209 | " c[ind] = 0\n", 210 | " end\n", 211 | "\n", 212 | " return(pair_list, magnitude)\n", 213 | "end;" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 8, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "function cor_matrix(p_orig, X_train)\n", 223 | " # Find the correlation matrix of the independent variables of the training data\n", 224 | " cor_matrix = cor(X_train[:,1:p_orig])\n", 225 | " pair_list, magnitude = sorted_correlations(cor_matrix)\n", 226 | " num_pairs = length(magnitude)\n", 227 | " return (pair_list, magnitude, num_pairs)\n", 228 | "end;" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 9, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "pair_list1, magnitude1, num_pairs1 = cor_matrix(p_orig1, X_train1)\n", 238 | "pair_list2, magnitude2, num_pairs2 = cor_matrix(p_orig2, X_train2);" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "### Build the MIO model" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 10, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "function linear_regression_framework(p_orig,\n", 255 | " X_train,\n", 256 | " y_train,\n", 257 | " pair_list, magnitude, num_pairs,\n", 258 | " K, # sparsity parameter\n", 259 | " Γ ,# robustness parameter (type \"\\Gamma\" then hit tab to get the Γ symbol)\n", 260 | " ρ # pairwise multicollinearity threshold (type \"\\rho\" then hit tab to get the ρ symbol)\n", 261 | " )\n", 262 | " n, p = size(X_train)\n", 263 | " Big_M = fill(1000, p)\n", 264 | " group_sparsity = true\n", 265 | " groups = ([1 2 3 4], [5 6 7], [8 9 10 11])\n", 266 | " num_opt = length(transforms)\n", 267 | " print_model = false\n", 268 | " OutputFlag = 0\n", 269 | " robustness = true\n", 270 | "\n", 271 | " m = Model(solver = GurobiSolver(OutputFlag=OutputFlag))\n", 272 | "\n", 273 | " @variable(m, β[1:p]); # (type \"\\beta\" then hit tab to get the β symbol)\n", 274 | " @variable(m, z[1:p], Bin);\n", 275 | "\n", 276 | " # Big M constraints\n", 277 | " @constraint(m, m_gt[d=1:p], β[d] <= Big_M[d] * z[d]);\n", 278 | " @constraint(m, m_lt[d=1:p], -Big_M[d] * z[d] <= β[d]);\n", 279 | "\n", 280 | "\n", 281 | "\n", 282 | " # Pairwise multicolinearity constraint\n", 283 | " for i=1:num_pairs\n", 284 | " if magnitude[i] > ρ\n", 285 | " ind1 = pair_list[i,1]\n", 286 | " ind2 = pair_list[i,2]\n", 287 | " @constraint(m, z[ind1] + z[ind2] + z[ind1 + p_orig] + z[ind2 + p_orig]\n", 288 | " + z[ind1 + 2p_orig] + z[ind2 + 2p_orig] + z[ind1 + 3p_orig] + z[ind2 + 3p_orig] <= 1)\n", 289 | " else\n", 290 | " break\n", 291 | " end\n", 292 | " end\n", 293 | "\n", 294 | " # Group sparsity constraint\n", 295 | " if group_sparsity\n", 296 | " @constraint(m, g_sparsity[i=1:length(groups),j=2:length(groups[i])], z[groups[i][1]] == z[groups[i][j]])\n", 297 | " end\n", 298 | "\n", 299 | " # Single choice of nonlinear transformation constraint\n", 300 | " if num_opt > 1\n", 301 | " @constraint(m, non_linear[j=1:p_orig], sum(z[j + t*p_orig] for t = 0:(num_opt-1)) <= 1)\n", 302 | " end\n", 303 | "\n", 304 | " # Sparsity constraint\n", 305 | " @constraint(m, sparsity, sum(z[d] for d = 1:p) <= K)\n", 306 | "\n", 307 | "\n", 308 | " # Objective function\n", 309 | " a = 0\n", 310 | " for i = 1:n\n", 311 | " a += 0.5(y_train[i] - dot(β, vec(X_train[i,:])))^2\n", 312 | " end\n", 313 | " # @variable(m, t >= 0)\n", 314 | " @variable(m, θ >= 0)\n", 315 | " # @constraint(m, norm(y_train - X_train * β) <= t)\n", 316 | " @objective(m, Min, a + Γ * θ)\n", 317 | "\n", 318 | " # Robustness\n", 319 | " if robustness\n", 320 | " @variable(m, β_0[1:p] >= 0)\n", 321 | " @constraint(m, beta_pos[j=1:p], β_0[j] >= β[j])\n", 322 | " @constraint(m, beta_neg[j=1:p], β_0[j] >= -β[j])\n", 323 | " @constraint(m, sum(β_0) <= θ)\n", 324 | " end\n", 325 | "\n", 326 | " if print_model\n", 327 | " print(m)\n", 328 | " end\n", 329 | " \n", 330 | " return(m, z, β)\n", 331 | " \n", 332 | "end;" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 11, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "function get_K_values(β_soln, K)\n", 342 | " β_soln_indices = convert(Array{Int64,1}, zeros(K)) \n", 343 | " β_soln_max = zeros(K)\n", 344 | " temp = copy(abs.(β_soln))\n", 345 | " for i in 1:K\n", 346 | " max = findmax(temp)\n", 347 | " β_soln_indices[i]= max[2]\n", 348 | " if β_soln[max[2]]>= 0\n", 349 | " β_soln_max[i] = max[1]\n", 350 | " else\n", 351 | " β_soln_max[i] = -max[1]\n", 352 | " end\n", 353 | " \n", 354 | " temp[β_soln_indices[i]] = 0\n", 355 | " end\n", 356 | " β_soln_real = zeros(length(β_soln))\n", 357 | " for i in 1:K\n", 358 | " β_soln_real[β_soln_indices[i]] = β_soln_max[i]\n", 359 | " end\n", 360 | "\n", 361 | " return(β_soln_real)\n", 362 | "end;" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 12, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "function solve_regression(m, z, β, K, print_result)\n", 372 | " status = solve(m)\n", 373 | " z_soln1 = getvalue(z)\n", 374 | " β_soln1 = getvalue(β)\n", 375 | " z_soln = get_K_values(z_soln1, K)\n", 376 | " β_soln = get_K_values(β_soln1, K)\n", 377 | " if print_result\n", 378 | " println(\"Status = \", status)\n", 379 | " println(\"Selected variables = \", find(z_soln))\n", 380 | " println(\"Regression Equation:\")\n", 381 | " println(\"y = \", join([\"$(round(β_soln[i],3)) x[$i]\" for i in find(z_soln)], \" + \"))\n", 382 | " end\n", 383 | " \n", 384 | " return (β_soln, z_soln)\n", 385 | "end;" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 13, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [ 394 | "function find_best_hyperparameters(p_orig, X_train, X_val, y_train, y_val, \n", 395 | " pair_list, magnitude, num_pairs,\n", 396 | " k_interval, k_step)\n", 397 | " p = size(X_train, 2)\n", 398 | " ρ = 0.9\n", 399 | " \n", 400 | " Γ_list = [0.001, 0.01, 0.1, 1, 2]\n", 401 | " k_list = [k_step*i for i in (k_interval[1]/k_step):(k_interval[2]/k_step)]\n", 402 | " k_list = convert(Array{Int64,1}, k_list)\n", 403 | " println(\"k_list: \", k_list)\n", 404 | " \n", 405 | " a = length(Γ_list)*length(k_list)\n", 406 | " #instantiate arrays\n", 407 | " scores = zeros(a)\n", 408 | " \n", 409 | " for i in 1:length(Γ_list)\n", 410 | " for j in 1:length(k_list)\n", 411 | " # training on train sets for both regression methods\n", 412 | " m, z1, β1 = linear_regression_framework(p_orig, X_train, y_train, pair_list, magnitude, num_pairs,\n", 413 | " k_list[j], Γ_list[i], ρ);\n", 414 | " β, z = solve_regression(m, z1, β1, k_list[j], false)\n", 415 | " \n", 416 | " # performance metrics on validation sets for both regression methods\n", 417 | " scores[length(k_list)*(i-1) + j] = norm(y_val - X_val * β)\n", 418 | " end \n", 419 | " end\n", 420 | " Argmin = indmin(scores)\n", 421 | " println(scores)\n", 422 | " Γ_argmin, k_argmin = Argmin÷length(k_list)+1, Argmin%length(k_list)\n", 423 | " if k_argmin == 0\n", 424 | " k_argmin=length(k_list)\n", 425 | " end\n", 426 | " \n", 427 | " return Γ_list[Γ_argmin], k_list[k_argmin]\n", 428 | "end;" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 14, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "name": "stdout", 438 | "output_type": "stream", 439 | "text": [ 440 | "k_list: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", 441 | "Academic license - for non-commercial use only\n", 442 | "Academic license - for non-commercial use only\n", 443 | "Academic license - for non-commercial use only\n", 444 | "Academic license - for non-commercial use only\n", 445 | "Academic license - for non-commercial use only\n", 446 | "Academic license - for non-commercial use only\n", 447 | "Academic license - for non-commercial use only\n", 448 | "Academic license - for non-commercial use only\n", 449 | "Academic license - for non-commercial use only\n", 450 | "Academic license - for non-commercial use only\n", 451 | "Academic license - for non-commercial use only\n", 452 | "Academic license - for non-commercial use only\n", 453 | "Academic license - for non-commercial use only\n", 454 | "Academic license - for non-commercial use only\n", 455 | "Academic license - for non-commercial use only\n", 456 | "Academic license - for non-commercial use only\n", 457 | "Academic license - for non-commercial use only\n", 458 | "Academic license - for non-commercial use only\n", 459 | "Academic license - for non-commercial use only\n", 460 | "Academic license - for non-commercial use only\n", 461 | "Academic license - for non-commercial use only\n", 462 | "Academic license - for non-commercial use only\n", 463 | "Academic license - for non-commercial use only\n", 464 | "Academic license - for non-commercial use only\n", 465 | "Academic license - for non-commercial use only\n", 466 | "Academic license - for non-commercial use only\n", 467 | "Academic license - for non-commercial use only\n", 468 | "Academic license - for non-commercial use only\n", 469 | "Academic license - for non-commercial use only\n", 470 | "Academic license - for non-commercial use only\n", 471 | "Academic license - for non-commercial use only\n", 472 | "Academic license - for non-commercial use only\n", 473 | "Academic license - for non-commercial use only\n", 474 | "Academic license - for non-commercial use only\n", 475 | "Academic license - for non-commercial use only\n", 476 | "Academic license - for non-commercial use only\n", 477 | "Academic license - for non-commercial use only\n", 478 | "Academic license - for non-commercial use only\n", 479 | "Academic license - for non-commercial use only\n", 480 | "Academic license - for non-commercial use only\n", 481 | "Academic license - for non-commercial use only\n", 482 | "Academic license - for non-commercial use only\n", 483 | "Academic license - for non-commercial use only\n", 484 | "Academic license - for non-commercial use only\n", 485 | "Academic license - for non-commercial use only\n", 486 | "Academic license - for non-commercial use only\n", 487 | "Academic license - for non-commercial use only\n", 488 | "Academic license - for non-commercial use only\n", 489 | "Academic license - for non-commercial use only\n", 490 | "Academic license - for non-commercial use only\n", 491 | "[1.28336, 1.0752, 0.833122, 0.863674, 0.846349, 0.889704, 0.901484, 0.902627, 0.88131, 0.94512, 1.28336, 1.07523, 0.843273, 0.838248, 0.845838, 0.858394, 0.874824, 0.875705, 0.853369, 0.928636, 1.28336, 1.10258, 0.844348, 1.39722, 0.846949, 1.4932, 0.920361, 0.900578, 0.898419, 0.904363, 1.28364, 1.14522, 1.10837, 1.16301, 4.74957, 1.16807, 2.39798, 1.42871, 1.12237, 1.14497, 1.28458, 1.22884, 1.13954, 1.29662, 1.45642, 1.43134, 1.39025, 1.39025, 1.44724, 1.31539]\n" 492 | ] 493 | } 494 | ], 495 | "source": [ 496 | "best_Γ1, best_K_1 = find_best_hyperparameters(p_orig1, X_train1, X_val1, y_train1, y_val1, \n", 497 | " pair_list1, magnitude1, num_pairs1, [1,10], 1);" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 15, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "name": "stdout", 507 | "output_type": "stream", 508 | "text": [ 509 | "k_list: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]\n", 510 | "Academic license - for non-commercial use only\n", 511 | "Academic license - for non-commercial use only\n", 512 | "Academic license - for non-commercial use only\n", 513 | "Academic license - for non-commercial use only\n", 514 | "Academic license - for non-commercial use only\n", 515 | "Academic license - for non-commercial use only\n", 516 | "Academic license - for non-commercial use only\n", 517 | "Academic license - for non-commercial use only\n", 518 | "Academic license - for non-commercial use only\n", 519 | "Academic license - for non-commercial use only\n", 520 | "Academic license - for non-commercial use only\n", 521 | "Academic license - for non-commercial use only\n", 522 | "Academic license - for non-commercial use only\n", 523 | "Academic license - for non-commercial use only\n", 524 | "Academic license - for non-commercial use only\n", 525 | "Academic license - for non-commercial use only\n", 526 | "Academic license - for non-commercial use only\n", 527 | "Academic license - for non-commercial use only\n", 528 | "Academic license - for non-commercial use only\n", 529 | "Academic license - for non-commercial use only\n", 530 | "Academic license - for non-commercial use only\n", 531 | "Academic license - for non-commercial use only\n", 532 | "Academic license - for non-commercial use only\n", 533 | "Academic license - for non-commercial use only\n", 534 | "Academic license - for non-commercial use only\n", 535 | "Academic license - for non-commercial use only\n", 536 | "Academic license - for non-commercial use only\n", 537 | "Academic license - for non-commercial use only\n", 538 | "Academic license - for non-commercial use only\n", 539 | "Academic license - for non-commercial use only\n", 540 | "Academic license - for non-commercial use only\n", 541 | "Academic license - for non-commercial use only\n", 542 | "Academic license - for non-commercial use only\n", 543 | "Academic license - for non-commercial use only\n", 544 | "Academic license - for non-commercial use only\n", 545 | "Academic license - for non-commercial use only\n", 546 | "Academic license - for non-commercial use only\n", 547 | "Academic license - for non-commercial use only\n", 548 | "Academic license - for non-commercial use only\n", 549 | "Academic license - for non-commercial use only\n", 550 | "Academic license - for non-commercial use only\n", 551 | "Academic license - for non-commercial use only\n", 552 | "Academic license - for non-commercial use only\n", 553 | "Academic license - for non-commercial use only\n", 554 | "Academic license - for non-commercial use only\n", 555 | "Academic license - for non-commercial use only\n", 556 | "Academic license - for non-commercial use only\n", 557 | "Academic license - for non-commercial use only\n", 558 | "Academic license - for non-commercial use only\n", 559 | "Academic license - for non-commercial use only\n", 560 | "Academic license - for non-commercial use only\n", 561 | "Academic license - for non-commercial use only\n", 562 | "Academic license - for non-commercial use only\n", 563 | "Academic license - for non-commercial use only\n", 564 | "Academic license - for non-commercial use only\n", 565 | "Academic license - for non-commercial use only\n", 566 | "Academic license - for non-commercial use only\n", 567 | "Academic license - for non-commercial use only\n", 568 | "Academic license - for non-commercial use only\n", 569 | "Academic license - for non-commercial use only\n", 570 | "Academic license - for non-commercial use only\n", 571 | "Academic license - for non-commercial use only\n", 572 | "Academic license - for non-commercial use only\n", 573 | "Academic license - for non-commercial use only\n", 574 | "Academic license - for non-commercial use only\n", 575 | "Academic license - for non-commercial use only\n", 576 | "Academic license - for non-commercial use only\n", 577 | "Academic license - for non-commercial use only\n", 578 | "Academic license - for non-commercial use only\n", 579 | "Academic license - for non-commercial use only\n", 580 | "Academic license - for non-commercial use only\n", 581 | "Academic license - for non-commercial use only\n", 582 | "Academic license - for non-commercial use only\n", 583 | "Academic license - for non-commercial use only\n", 584 | "Academic license - for non-commercial use only\n", 585 | "Academic license - for non-commercial use only\n", 586 | "Academic license - for non-commercial use only\n", 587 | "Academic license - for non-commercial use only\n", 588 | "Academic license - for non-commercial use only\n", 589 | "Academic license - for non-commercial use only\n", 590 | "Academic license - for non-commercial use only\n", 591 | "Academic license - for non-commercial use only\n", 592 | "Academic license - for non-commercial use only\n", 593 | "Academic license - for non-commercial use only\n", 594 | "Academic license - for non-commercial use only\n", 595 | "Academic license - for non-commercial use only\n", 596 | "Academic license - for non-commercial use only\n", 597 | "Academic license - for non-commercial use only\n", 598 | "Academic license - for non-commercial use only\n", 599 | "Academic license - for non-commercial use only\n", 600 | "Academic license - for non-commercial use only\n", 601 | "Academic license - for non-commercial use only\n", 602 | "Academic license - for non-commercial use only\n", 603 | "Academic license - for non-commercial use only\n", 604 | "Academic license - for non-commercial use only\n", 605 | "Academic license - for non-commercial use only\n", 606 | "Academic license - for non-commercial use only\n", 607 | "Academic license - for non-commercial use only\n", 608 | "Academic license - for non-commercial use only\n", 609 | "Academic license - for non-commercial use only\n", 610 | "Academic license - for non-commercial use only\n", 611 | "Academic license - for non-commercial use only\n", 612 | "Academic license - for non-commercial use only\n", 613 | "Academic license - for non-commercial use only\n", 614 | "Academic license - for non-commercial use only\n", 615 | "Academic license - for non-commercial use only\n", 616 | "Academic license - for non-commercial use only\n", 617 | "Academic license - for non-commercial use only\n", 618 | "Academic license - for non-commercial use only\n", 619 | "Academic license - for non-commercial use only\n", 620 | "Academic license - for non-commercial use only\n", 621 | "Academic license - for non-commercial use only\n", 622 | "Academic license - for non-commercial use only\n", 623 | "Academic license - for non-commercial use only\n", 624 | "Academic license - for non-commercial use only\n", 625 | "Academic license - for non-commercial use only\n", 626 | "Academic license - for non-commercial use only\n", 627 | "Academic license - for non-commercial use only\n", 628 | "Academic license - for non-commercial use only\n", 629 | "Academic license - for non-commercial use only\n", 630 | "Academic license - for non-commercial use only\n", 631 | "Academic license - for non-commercial use only\n", 632 | "Academic license - for non-commercial use only\n", 633 | "Academic license - for non-commercial use only\n", 634 | "Academic license - for non-commercial use only\n", 635 | "Academic license - for non-commercial use only\n", 636 | "Academic license - for non-commercial use only\n", 637 | "Academic license - for non-commercial use only\n", 638 | "Academic license - for non-commercial use only\n", 639 | "Academic license - for non-commercial use only\n", 640 | "Academic license - for non-commercial use only\n", 641 | "Academic license - for non-commercial use only\n", 642 | "Academic license - for non-commercial use only\n", 643 | "Academic license - for non-commercial use only\n", 644 | "Academic license - for non-commercial use only\n", 645 | "Academic license - for non-commercial use only\n", 646 | "Academic license - for non-commercial use only\n", 647 | "Academic license - for non-commercial use only\n", 648 | "Academic license - for non-commercial use only\n", 649 | "Academic license - for non-commercial use only\n", 650 | "[3.73087, 2.45258, 1.73217, 1.61041, 1.30786, 42.4015, 1.44099, 1.44099, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 42.2266, 3.72754, 2.45261, 1.73224, 1.61045, 1.44253, 1.30529, 1.29591, 1.30896, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 42.0485, 3.75331, 2.4529, 1.73291, 1.64038, 1.30785, 1.29696, 42.045, 42.045, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 42.0779, 4.26063, 2.45601, 1.73995, 1.79356, 1.30873, 42.3899, 1.30298, 42.1181, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 42.3899, 3.73113, 2.46002, 2.24173, 2.482, 1.31176, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783, 42.3783]\n" 651 | ] 652 | } 653 | ], 654 | "source": [ 655 | "best_Γ2, best_K_2 = find_best_hyperparameters(p_orig2, X_train2, X_val2, y_train2, y_val2, \n", 656 | " pair_list2, magnitude2, num_pairs2, [1,28], 1);" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": 16, 662 | "metadata": {}, 663 | "outputs": [ 664 | { 665 | "name": "stdout", 666 | "output_type": "stream", 667 | "text": [ 668 | "\n", 669 | "Best Γ for dataset1: 0.001\n", 670 | "Best K for dataset1: 3\n", 671 | "\n", 672 | "Best Γ for dataset2: 0.01\n", 673 | "Best K for dataset2: 7\n" 674 | ] 675 | } 676 | ], 677 | "source": [ 678 | "println(\"\\nBest Γ for dataset1: \", best_Γ1, \"\\nBest K for dataset1: \", best_K_1)\n", 679 | "println(\"\\nBest Γ for dataset2: \", best_Γ2, \"\\nBest K for dataset2: \", best_K_2);" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": 17, 685 | "metadata": {}, 686 | "outputs": [], 687 | "source": [ 688 | "# retrain the whole model with training and validation sets together\n", 689 | "function retrain_linear(p_orig, X_train, y_train, X_val, y_val, pair_list, magnitude, num_pairs, K, Γ, ρ)\n", 690 | " X_train_new = vcat(X_train, X_val)\n", 691 | " y_train_new = vcat(y_train, y_val)\n", 692 | " \n", 693 | " m, z1, β1 = linear_regression_framework(p_orig, X_train_new, y_train_new, pair_list, magnitude, num_pairs,\n", 694 | " K, Γ, ρ);\n", 695 | " β, z = solve_regression(m, z1, β1, K, true)\n", 696 | " return(β, z)\n", 697 | "end;" 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": 18, 703 | "metadata": {}, 704 | "outputs": [ 705 | { 706 | "name": "stdout", 707 | "output_type": "stream", 708 | "text": [ 709 | "Academic license - for non-commercial use only\r\n", 710 | "Status = Optimal\n", 711 | "Selected variables = [19, 27, 56]\n", 712 | "Regression Equation:\n", 713 | "y = 0.039 x[19] + 0.027 x[27] + 2.645 x[56]\n" 714 | ] 715 | } 716 | ], 717 | "source": [ 718 | "β_opt_1, z_opt_1 = retrain_linear(p_orig1, X_train1, y_train1, X_val1, y_val1, pair_list1, magnitude1, num_pairs1, best_K_1, best_Γ1, 0.9);" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 19, 724 | "metadata": {}, 725 | "outputs": [ 726 | { 727 | "name": "stdout", 728 | "output_type": "stream", 729 | "text": [ 730 | "Academic license - for non-commercial use only\r\n", 731 | "Status = Optimal\n", 732 | "Selected variables = [16, 18, 19, 22, 24, 27, 28]\n", 733 | "Regression Equation:\n", 734 | "y = 1.857 x[16] + -0.445 x[18] + -0.473 x[19] + 3.433 x[22] + 0.132 x[24] + -0.266 x[27] + 0.432 x[28]\n" 735 | ] 736 | } 737 | ], 738 | "source": [ 739 | "β_opt_2, z_opt_2 = retrain_linear(p_orig2, X_train2, y_train2, X_val2, y_val2, pair_list1, magnitude1, num_pairs1, best_K_2, best_Γ2, 0.9);" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": 20, 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "function out_of_sample(X_train, y_train, X_val, y_val, X_test, y_test, β_soln, K, Γ, ρ) \n", 749 | " # Out of sample testing\n", 750 | " y_hat_train = X_train*β_soln\n", 751 | " R2_train = 1 - sum((y_hat_train - y_train).^2) / sum((mean(y_train) - y_train).^2)\n", 752 | " \n", 753 | " y_hat_val = X_val*β_soln\n", 754 | " R2_val = 1 - sum((y_hat_val - y_val).^2) / sum((mean(y_train) - y_val).^2)\n", 755 | " \n", 756 | " y_hat_test = X_test*β_soln\n", 757 | " R2_test = 1 - sum((y_hat_test - y_test).^2) / sum((mean(y_train) - y_test).^2)\n", 758 | " \n", 759 | " z_soln_manual = abs.(β_soln) .> 0.0000001\n", 760 | "\n", 761 | "\n", 762 | "\n", 763 | " println(\"***PARAMETERS***\")\n", 764 | " println(\"Sparsity parameter K = \", K)\n", 765 | " println(\"Pairwise correlation threshold ρ = \", ρ)\n", 766 | " println(\"Robustness parameter Γ = \", Γ)\n", 767 | " println()\n", 768 | " println(\"***RESULTS***\")\n", 769 | " println(\"MIO R2 train\\t\", R2_train)\n", 770 | " println(\"MIO R2 validation\\t\", R2_val)\n", 771 | " println(\"MIO R2 test\\t\", R2_test)\n", 772 | "end;" 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": 21, 778 | "metadata": {}, 779 | "outputs": [ 780 | { 781 | "name": "stdout", 782 | "output_type": "stream", 783 | "text": [ 784 | "***PARAMETERS***\n", 785 | "Sparsity parameter K = 3\n", 786 | "Pairwise correlation threshold ρ = 0.9\n", 787 | "Robustness parameter Γ = 0.001\n", 788 | "\n", 789 | "***RESULTS***\n", 790 | "MIO R2 train\t0.8432352378395275\n", 791 | "MIO R2 validation\t0.8127748811895958\n", 792 | "MIO R2 test\t0.7697463987128148\n" 793 | ] 794 | } 795 | ], 796 | "source": [ 797 | "out_of_sample(X_train1, y_train1, X_val1, y_val1, X_test1, y_test1, β_opt_1, best_K_1, best_Γ1, 0.9)" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": 22, 803 | "metadata": {}, 804 | "outputs": [ 805 | { 806 | "name": "stdout", 807 | "output_type": "stream", 808 | "text": [ 809 | "***PARAMETERS***\n", 810 | "Sparsity parameter K = 7\n", 811 | "Pairwise correlation threshold ρ = 0.9\n", 812 | "Robustness parameter Γ = 0.01\n", 813 | "\n", 814 | "***RESULTS***\n", 815 | "MIO R2 train\t0.9219065010999199\n", 816 | "MIO R2 validation\t0.8982248481520309\n", 817 | "MIO R2 test\t0.9274666342602617\n" 818 | ] 819 | } 820 | ], 821 | "source": [ 822 | "out_of_sample(X_train2, y_train2, X_val2, y_val2, X_test2, y_test2, β_opt_2, best_K_2, best_Γ2, 0.9)" 823 | ] 824 | }, 825 | { 826 | "cell_type": "code", 827 | "execution_count": 30, 828 | "metadata": {}, 829 | "outputs": [ 830 | { 831 | "name": "stdout", 832 | "output_type": "stream", 833 | "text": [ 834 | "2\n", 835 | "2\n", 836 | "\n", 837 | "4\n", 838 | "2\n", 839 | "\n", 840 | "5\n", 841 | "2\n", 842 | "\n", 843 | "1\n", 844 | "3\n", 845 | "\n", 846 | "3\n", 847 | "3\n", 848 | "\n", 849 | "6\n", 850 | "3\n", 851 | "\n", 852 | "0\n", 853 | "4\n", 854 | "\n" 855 | ] 856 | } 857 | ], 858 | "source": [ 859 | "a = [16, 18, 19, 22, 24, 27, 28]\n", 860 | "for x in a\n", 861 | " println(x%7)\n", 862 | " println(x÷7)\n", 863 | " println()\n", 864 | "end" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": 27, 870 | "metadata": {}, 871 | "outputs": [ 872 | { 873 | "data": { 874 | "text/plain": [ 875 | "3" 876 | ] 877 | }, 878 | "execution_count": 27, 879 | "metadata": {}, 880 | "output_type": "execute_result" 881 | } 882 | ], 883 | "source": [] 884 | }, 885 | { 886 | "cell_type": "code", 887 | "execution_count": null, 888 | "metadata": {}, 889 | "outputs": [], 890 | "source": [] 891 | } 892 | ], 893 | "metadata": { 894 | "kernelspec": { 895 | "display_name": "Julia 0.6.4", 896 | "language": "julia", 897 | "name": "julia-0.6" 898 | }, 899 | "language_info": { 900 | "file_extension": ".jl", 901 | "mimetype": "application/julia", 902 | "name": "julia", 903 | "version": "0.6.4" 904 | } 905 | }, 906 | "nbformat": 4, 907 | "nbformat_minor": 2 908 | } 909 | -------------------------------------------------------------------------------- /Homework2-Algorithmic-framework-for-linear-regression-and-convex-regression/hw2-answer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework2-Algorithmic-framework-for-linear-regression-and-convex-regression/hw2-answer.pdf -------------------------------------------------------------------------------- /Homework2-Algorithmic-framework-for-linear-regression-and-convex-regression/hw2-subject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework2-Algorithmic-framework-for-linear-regression-and-convex-regression/hw2-subject.pdf -------------------------------------------------------------------------------- /Homework3-Optimal-trees-and-optimal-data-imputation/hw3-answers-Nhi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework3-Optimal-trees-and-optimal-data-imputation/hw3-answers-Nhi.pdf -------------------------------------------------------------------------------- /Homework3-Optimal-trees-and-optimal-data-imputation/hw3-optimal-trees-and-optimal-missing-data-imputation.jl: -------------------------------------------------------------------------------- 1 | using DataFrames 2 | using MLDataUtils 3 | using OptimalTrees 4 | 5 | function tune_train_OCT(train_X, train_y, valid_X, valid_y, test_X, test_y) 6 | println("\n##### Tuning and training the OCT #####") 7 | # Tuning parameters for an OCT 8 | # - If we do not specify the cp value, then it is autotuned (preferred) 9 | # - We use validation to determine the best parameter 10 | # combination (max_depth, criterion, minbucket) 11 | # - Because we are validating over the criterion, we 12 | # select one criterion (in this case entropy) to be used 13 | # as the evaluation metric. 14 | # - We specify N=100 random starts, and set the random seed = 1. 15 | 16 | lnr = OptimalTrees.OptimalTreeClassifier(ls_random_seed=1, 17 | ls_num_tree_restarts=100) 18 | grid1 = OptimalTrees.GridSearch(lnr, Dict( 19 | :max_depth => 1:8, 20 | :minbucket => [5, 10, 15, 20, 25] 21 | )); 22 | 23 | OptimalTrees.fit!(grid1, train_X, train_y, valid_X, valid_y, 24 | validation_criterion = :entropy); 25 | 26 | # Print out the best parameters that were selected 27 | println("\nBest Parameters") 28 | println("----------------------") 29 | #println(grid1.best_params) 30 | println("cp = ", grid1.best_params[:cp]) 31 | println("Max Depth = ", grid1.best_params[:max_depth]) 32 | println("Minbucket = ", grid1.best_params[:minbucket]) 33 | #println("Criterion = ", grid1.best_params[:criterion]) 34 | println("----------------------\n") 35 | 36 | println("Tree:") 37 | println(grid1.best_lnr) 38 | println() 39 | 40 | #println(OptimalTrees.getnodefields(grid1.best_lnr, [:class, :probs, :split_type])) 41 | #println(OptimalTrees.getnodefields(grid1.best_lnr, [:class, :probs, :depth])) 42 | # Here are the node fields that we can display 43 | #println(fieldnames(grid1.best_lnr.tree_.nodes[1])) 44 | 45 | # Compute the in-sample and out-of-sample accuracy 46 | # Note: This problem has K = 3 classes, so we cannot compute AUC. 47 | train_acc = OptimalTrees.score(grid1.best_lnr, train_X, train_y, criterion=:misclassification) 48 | valid_acc = OptimalTrees.score(grid1.best_lnr, valid_X, valid_y, criterion=:misclassification) 49 | test_acc = OptimalTrees.score(grid1.best_lnr, test_X, test_y, criterion=:misclassification) 50 | println("OCT-Parallel Results") 51 | println("----------------------") 52 | println("Training accuracy = ", round(100 * train_acc, 2), "%") 53 | println("Validation accuracy = ", round(100 * valid_acc, 2), "%") 54 | println("Testing accuracy = ", round(100 * test_acc, 2), "%") 55 | 56 | return(grid1.best_lnr) 57 | 58 | end 59 | 60 | 61 | ############################## 62 | ###### QUESTION 1 ############# 63 | 64 | data = readtable("iris.csv", makefactors = true) 65 | X = data[:,1:(end-1)] 66 | y = data[:,end] 67 | 68 | srand(1) 69 | (big_X, big_y), (test_X, test_y) = stratifiedobs((X, y), p=0.75); 70 | (train_X, train_y), (valid_X, valid_y) = stratifiedobs((big_X, big_y), p=0.67); 71 | 72 | lnr1 = tune_train_OCT(train_X, train_y, valid_X, valid_y, test_X, test_y) 73 | 74 | ###### QUESTION 2 ############# 75 | println("\nWhat is the most important variables in this dataset?") 76 | println(OptimalTrees.variable_importance(lnr1)) 77 | 78 | 79 | 80 | 81 | 82 | ######## question 4: necessary functions ######## 83 | 84 | println("\n\n**** Data imputation, leaving the outcome variable out ****\n") 85 | 86 | # load missing data 87 | missing = readtable("iris-missing.csv", makefactors = true) 88 | 89 | #### Function to compute the MAE ### 90 | function getMAE(X_real, X_imputed, X_missing) 91 | n, p = size(X_real) 92 | missing_count = 0 93 | totalError = 0.0 94 | for i in 1:n 95 | for d in 1:p 96 | if isna( X_missing[i,d] ) 97 | missing_count += 1 98 | totalError += abs(X_real[i,d] - X_imputed[i,d]) 99 | end 100 | end 101 | end 102 | return(totalError / missing_count) 103 | end 104 | 105 | 106 | function impute_missing(X_real, X_missing) 107 | # Data imputation with MEAN method 108 | X_mean = OptImpute.impute(X_missing, :mean); 109 | println("\n**** Mean method ****\nMAE : ", getMAE(X_real, X_mean, X_missing)) 110 | 111 | println("\n\n***** Cross-Validation *****") 112 | # Use cross-validation to select the parameters 113 | # - knn_k is the number of neighbors for K-NN 114 | # - algorithm is either Block Coordinate Decent (BCD) 115 | # or Coordinate Descent (CD) 116 | # - norm is either L_1 (l1) or L_2 (l2). L_2 uses the 117 | # standard Euclidean distance, L_1 uses the absolute 118 | # value distance. 119 | 120 | imputer = OptImpute.Imputer(:opt_knn) 121 | grid2 = OptImpute.GridSearch(imputer, Dict( 122 | :knn_k => [10, 20, 30, 40, 50], 123 | :algorithm => [:CD, :BCD], 124 | :norm => [:l1, :l2] 125 | )) 126 | X_opt_knn_cv = OptImpute.fit!(grid2, X_missing) 127 | 128 | println("Best Parameters") 129 | println("----------------------") 130 | println(grid2.best_params) 131 | println("Number of neighbors k = ", grid2.best_params[:knn_k]) 132 | println("Algorithm = ", grid2.best_params[:algorithm]) 133 | println("Norm = ", grid2.best_params[:norm]) 134 | 135 | println("**** opt.knn method ****\nMAE: ", getMAE(X_real, X_opt_knn_cv, X_missing)) 136 | 137 | return(X_mean, X_opt_knn_cv) 138 | end 139 | 140 | 141 | 142 | ############ QUESTION 4 answers ######################## 143 | X_missing1 = missing[:,1:(end-1)] 144 | X_real = data[:,1:(end-1)] 145 | 146 | println("\n#################### Question 4 ####################\n") 147 | X_mean1, X_opt_knn_cv1 = impute_missing(X_real, X_missing1) 148 | 149 | println("\n**** Retrain OCT on imputed data with MEAN METHOD | q.4****\n") 150 | y = missing[:,end] 151 | 152 | srand(1) 153 | (big_X2, big_y2), (test_X2, test_y2) = stratifiedobs((X_mean1, y), p=0.75); 154 | (train_X2, train_y2), (valid_X2, valid_y2) = stratifiedobs((big_X2, big_y2), p=0.67); 155 | 156 | lnr_mean1 = tune_train_OCT(train_X2, train_y2, valid_X2, valid_y2, test_X2, test_y2) 157 | println("\nWhat is the most important variables in this dataset?") 158 | println(OptimalTrees.variable_importance(lnr_mean1)) 159 | 160 | println("\n**** Retrain OCT on imputed data with OPT.KNN METHOD |4 ****\n") 161 | 162 | srand(1) 163 | (big_X3, big_y3), (test_X3, test_y3) = stratifiedobs((X_opt_knn_cv1, y), p=0.75); 164 | (train_X3, train_y3), (valid_X3, valid_y3) = stratifiedobs((big_X3, big_y3), p=0.67); 165 | 166 | lnr_knn1 = tune_train_OCT(train_X3, train_y3, valid_X3, valid_y3, test_X3, test_y3) 167 | println("\nWhat is the most important variables in this dataset?") 168 | println(OptimalTrees.variable_importance(lnr_knn1)) 169 | 170 | 171 | println("\nCORRELATION MATRIX of X_mean1") 172 | println(cor(Matrix( X_mean1 ))) 173 | 174 | println("\nCORRELATION MATRIX of X_opt_knn_cv1") 175 | println(cor(Matrix( X_opt_knn_cv1))) 176 | 177 | println("\nCORRELATION MATRIX of X") 178 | println(cor(Matrix( X))) 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | ############ QUESTION 5 answers ######################## 187 | println("\n############## Question 5 ######################\n") 188 | 189 | mean2, opt_knn_cv2 = impute_missing(data, missing) 190 | X_mean2, X_opt_knn_cv2 = mean2[:,1:(end-1)], opt_knn_cv2[:,1:(end-1)] 191 | 192 | 193 | 194 | println("\n**** Retrain OCT on imputed data with MEAN METHOD **** | q5") 195 | srand(1) 196 | (big_X4, big_y4), (test_X_missing, test_y_missing) = stratifiedobs((X_missing1, y), p=0.75); 197 | (big_X5, big_y5), (test_X5, test_y5) = stratifiedobs((X_mean2, y), p=0.75); 198 | (train_X4, train_y4), (valid_X4, valid_y4) = stratifiedobs((big_X5, big_y5), p=0.67); 199 | 200 | test_X_mean2, test_X_opt_knn_cv2 = impute_missing(test_X, test_X_missing) 201 | 202 | lnr_mean2 = tune_train_OCT(train_X4, train_y4, valid_X4, valid_y4, test_X_mean2, test_y) 203 | println("\nWhat is the most important variables in this dataset?") 204 | println(OptimalTrees.variable_importance(lnr_mean2)) 205 | 206 | println("\n\n**** Retrain OCT on imputed data with OPT.KNN METHOD ****| q5") 207 | srand(1) 208 | (big_X6, big_y6), (test_X6, test_y6) = stratifiedobs((X_opt_knn_cv2, y), p=0.75); 209 | (train_X6, train_y6), (valid_X6, valid_y6) = stratifiedobs((big_X6, big_y6), p=0.67); 210 | 211 | lnr_knn2 = tune_train_OCT(train_X6, train_y6, valid_X6, valid_y6, test_X_opt_knn_cv2, test_y) 212 | println("\nWhat is the most important variables in this dataset?") 213 | println(OptimalTrees.variable_importance(lnr_knn2)) 214 | 215 | 216 | println("\nCORRELATION MATRIX of X_mean2") 217 | println(cor(Matrix( vcat(train_X4, valid_X4, test_X_mean2) ))) 218 | 219 | println("\nCORRELATION MATRIX of X_opt_knn_cv2") 220 | println(cor(Matrix( vcat(train_X6, valid_X6, test_X_opt_knn_cv2) ))) 221 | 222 | println("\nCORRELATION MATRIX of X") 223 | println(cor(Matrix( X))) 224 | -------------------------------------------------------------------------------- /Homework3-Optimal-trees-and-optimal-data-imputation/hw3-subject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework3-Optimal-trees-and-optimal-data-imputation/hw3-subject.pdf -------------------------------------------------------------------------------- /Homework4-Precriptive-approach-to-forecast-stocks-sales/hw4-Q3-ML-model-predict-sales.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 15.095. Homework 4\n", 8 | "\n", 9 | "Kim-Anh-Nhi Nguyen" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Question 3" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## (a) Data preprocessing" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 40, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# imports\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "from sklearn.model_selection import GridSearchCV\n", 35 | "import pandas as pd\n", 36 | "import numpy as np\n", 37 | "#from sklearn.ensemble import RandomForestRegressor\n", 38 | "from sklearn.tree import DecisionTreeRegressor\n", 39 | "from sklearn import preprocessing\n", 40 | "from sklearn.metrics import mean_absolute_error\n", 41 | "\n", 42 | "%matplotlib inline\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# load data\n", 52 | "items = pd.read_csv(\"items.csv\")\n", 53 | "sales = pd.read_csv(\"sales.csv\")\n", 54 | "info = pd.read_csv(\"sideinformation.csv\")" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "First, we look at what the 3 tables look like." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/html": [ 72 | "
\n", 73 | "\n", 86 | "\n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | "
dateitem_nbrondisplayonpromotionunit_sales
02017-01-02103665FalseFalse0.0
12017-01-02105574FalseFalse0.0
22017-01-02105575TrueFalse3.0
32017-01-02105577TrueFalse1.0
42017-01-02105737TrueFalse1.0
\n", 140 | "
" 141 | ], 142 | "text/plain": [ 143 | " date item_nbr ondisplay onpromotion unit_sales\n", 144 | "0 2017-01-02 103665 False False 0.0\n", 145 | "1 2017-01-02 105574 False False 0.0\n", 146 | "2 2017-01-02 105575 True False 3.0\n", 147 | "3 2017-01-02 105577 True False 1.0\n", 148 | "4 2017-01-02 105737 True False 1.0" 149 | ] 150 | }, 151 | "execution_count": 3, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "sales.head()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 4, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "100\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "# check there are 100 unique items\n", 175 | "print(sales['item_nbr'].unique().size)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/html": [ 186 | "
\n", 187 | "\n", 200 | "\n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | "
item_nbrfamilyclassperishablecostprice
0103665BREAD/BAKERY271210.724.00
1105574GROCERY I104501.973.33
2105575GROCERY I104501.023.72
3105577GROCERY I104502.214.89
4105737GROCERY I104402.353.71
\n", 260 | "
" 261 | ], 262 | "text/plain": [ 263 | " item_nbr family class perishable cost price\n", 264 | "0 103665 BREAD/BAKERY 2712 1 0.72 4.00\n", 265 | "1 105574 GROCERY I 1045 0 1.97 3.33\n", 266 | "2 105575 GROCERY I 1045 0 1.02 3.72\n", 267 | "3 105577 GROCERY I 1045 0 2.21 4.89\n", 268 | "4 105737 GROCERY I 1044 0 2.35 3.71" 269 | ] 270 | }, 271 | "execution_count": 5, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "items.head()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 6, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "100\n", 290 | "12\n", 291 | "47\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "# check how many unique items, families and classes\n", 297 | "print(items['item_nbr'].unique().size)\n", 298 | "print(items['family'].unique().size)\n", 299 | "print(items['class'].unique().size)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 7, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/html": [ 310 | "
\n", 311 | "\n", 324 | "\n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | "
dateisHolidayoilPrice
02017-01-02True52.36
12017-01-03False53.26
22017-01-04False53.06
32017-01-05False53.62
42017-01-06False52.41
\n", 366 | "
" 367 | ], 368 | "text/plain": [ 369 | " date isHoliday oilPrice\n", 370 | "0 2017-01-02 True 52.36\n", 371 | "1 2017-01-03 False 53.26\n", 372 | "2 2017-01-04 False 53.06\n", 373 | "3 2017-01-05 False 53.62\n", 374 | "4 2017-01-06 False 52.41" 375 | ] 376 | }, 377 | "execution_count": 7, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "info.head()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "### We encode the categorical variables as integers" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 8, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "# label encoder for item_nbr, family and class\n", 400 | "\n", 401 | "le1 = preprocessing.LabelEncoder()\n", 402 | "le1.fit(list(items['item_nbr'].unique()))\n", 403 | "items['item_nbr_encoded'] = le1.transform(items['item_nbr'])\n", 404 | "\n", 405 | "le2 = preprocessing.LabelEncoder()\n", 406 | "le2.fit(list(items['family'].unique()))\n", 407 | "items['family_encoded'] = le2.transform(items['family'])\n", 408 | "\n", 409 | "le3 = preprocessing.LabelEncoder()\n", 410 | "le3.fit(list(items['class'].unique()))\n", 411 | "items['class_encoded'] = le3.transform(items['class'])" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "### Now, we can merge the 3 tables" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 9, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "#merge items and sales, sales on the left\n", 428 | "df = pd.merge(sales, items, on = 'item_nbr', how = 'left')" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 10, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "# merge with info table\n", 438 | "df = pd.merge(df, info, on = 'date', how = 'left')" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "### More Data Processing: \n", 446 | "1. We create 3 columns to take the date into account\n", 447 | "2. We round the sales as integers, because there is numerical float issues for some rows (e.g. 3.999999998 instead of 4.0) " 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 11, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "#column year, month, day\n", 457 | "df['year'] = df['date'].str[0:4]\n", 458 | "df['month'] = df['date'].str[5:7]\n", 459 | "df['day'] = df['date'].str[8:10]" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 12, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "df['unit_sales'] = round(df['unit_sales'], 0)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "### We export the final merged table" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 13, 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "df.to_csv(\"final_data_with_date.csv\")" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "### We separate the data into a train and a test set\n", 492 | "- The training set contains all the observations from 2017-01-02 to 2017-08-14\n", 493 | "- The testing set contains all the observations from our target date only, i.e., 2017-08-15\n", 494 | "- the target variable for each set (the y variable) is the `sales` column\n", 495 | "- Finally, we export the different tables, so we can use them for predictions" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 14, 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "name": "stdout", 505 | "output_type": "stream", 506 | "text": [ 507 | "22500 100\n" 508 | ] 509 | } 510 | ], 511 | "source": [ 512 | "df_train, df_test = df.loc[df['date'] != '2017-08-15'], df.loc[df['date'] == '2017-08-15']\n", 513 | "print(len(df_train), len(df_test))" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 15, 519 | "metadata": {}, 520 | "outputs": [], 521 | "source": [ 522 | "df2_train = df_train.drop(['item_nbr', 'family', 'class', 'date', 'unit_sales'], axis = 1)\n", 523 | "df2_test = df_test.drop([ 'item_nbr', 'family', 'class', 'date', 'unit_sales'], axis = 1)" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 16, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "y_train, y_test = df_train[['unit_sales']], df_test[['unit_sales']]" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 17, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "df2_train.to_csv(\"X_train.csv\")\n", 542 | "df2_test.to_csv(\"X_test.csv\")\n", 543 | "y_train.to_csv(\"y_train.csv\")\n", 544 | "y_test.to_csv(\"y_test.csv\")" 545 | ] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": {}, 550 | "source": [ 551 | "---\n", 552 | "## (b) Model training: Decision tree" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 18, 558 | "metadata": {}, 559 | "outputs": [], 560 | "source": [ 561 | "# load data\n", 562 | "df = pd.read_csv(\"final_data_with_date.csv\")\n", 563 | "X_train = pd.read_csv(\"X_train.csv\")\n", 564 | "X_test = pd.read_csv(\"X_test.csv\")\n", 565 | "y_train = pd.read_csv(\"y_train.csv\")\n", 566 | "y_test = pd.read_csv(\"y_test.csv\")\n", 567 | "\n", 568 | "# we drop the 'Unnamed: 0' column (old rows in the original tables)\n", 569 | "X_train = X_train.drop(['Unnamed: 0'], axis = 1)\n", 570 | "X_test = X_test.drop(['Unnamed: 0'], axis = 1)\n", 571 | "y_train = y_train.drop(['Unnamed: 0'], axis = 1)\n", 572 | "y_test = y_test.drop(['Unnamed: 0'], axis = 1)" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 19, 578 | "metadata": {}, 579 | "outputs": [], 580 | "source": [ 581 | "from sklearn.externals import joblib\n", 582 | "\n", 583 | "def savemodel(filename, model):\n", 584 | " # Save to file in the current working directory\n", 585 | " path = filename + \".joblib\"\n", 586 | " joblib_file = path\n", 587 | " joblib.dump(model, joblib_file)\n", 588 | "\n", 589 | "def loadmodel(joblib_file):\n", 590 | " # Load from file\n", 591 | " joblib_model = joblib.load(joblib_file)\n", 592 | " return(joblib_model)" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 20, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "# parameters that we are going to tune\n", 602 | "DT_param_grid = { 'max_depth':[2, 3, 4, 5, 10, 20, 50],\n", 603 | " 'min_samples_split': [2, 10, 50, 100],\n", 604 | " 'min_samples_leaf': [1, 5, 10]\n", 605 | " }" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": 21, 611 | "metadata": {}, 612 | "outputs": [ 613 | { 614 | "name": "stdout", 615 | "output_type": "stream", 616 | "text": [ 617 | "Fitting 10 folds for each of 84 candidates, totalling 840 fits\n" 618 | ] 619 | }, 620 | { 621 | "name": "stderr", 622 | "output_type": "stream", 623 | "text": [ 624 | "[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.\n", 625 | "[Parallel(n_jobs=16)]: Done 9 tasks | elapsed: 8.7s\n", 626 | "[Parallel(n_jobs=16)]: Done 18 tasks | elapsed: 9.2s\n", 627 | "[Parallel(n_jobs=16)]: Done 29 tasks | elapsed: 9.9s\n", 628 | "[Parallel(n_jobs=16)]: Done 40 tasks | elapsed: 10.2s\n", 629 | "[Parallel(n_jobs=16)]: Done 53 tasks | elapsed: 10.6s\n", 630 | "[Parallel(n_jobs=16)]: Done 66 tasks | elapsed: 11.1s\n", 631 | "[Parallel(n_jobs=16)]: Done 81 tasks | elapsed: 11.6s\n", 632 | "[Parallel(n_jobs=16)]: Done 96 tasks | elapsed: 12.1s\n", 633 | "[Parallel(n_jobs=16)]: Done 113 tasks | elapsed: 12.7s\n", 634 | "[Parallel(n_jobs=16)]: Done 130 tasks | elapsed: 13.3s\n", 635 | "[Parallel(n_jobs=16)]: Done 149 tasks | elapsed: 13.9s\n", 636 | "[Parallel(n_jobs=16)]: Done 168 tasks | elapsed: 14.6s\n", 637 | "[Parallel(n_jobs=16)]: Done 189 tasks | elapsed: 15.2s\n", 638 | "[Parallel(n_jobs=16)]: Done 210 tasks | elapsed: 16.1s\n", 639 | "[Parallel(n_jobs=16)]: Done 233 tasks | elapsed: 16.9s\n", 640 | "[Parallel(n_jobs=16)]: Done 256 tasks | elapsed: 17.7s\n", 641 | "[Parallel(n_jobs=16)]: Done 281 tasks | elapsed: 18.6s\n", 642 | "[Parallel(n_jobs=16)]: Done 306 tasks | elapsed: 19.5s\n", 643 | "[Parallel(n_jobs=16)]: Done 333 tasks | elapsed: 20.6s\n", 644 | "[Parallel(n_jobs=16)]: Done 360 tasks | elapsed: 21.7s\n", 645 | "[Parallel(n_jobs=16)]: Done 389 tasks | elapsed: 22.9s\n", 646 | "[Parallel(n_jobs=16)]: Done 418 tasks | elapsed: 24.0s\n", 647 | "[Parallel(n_jobs=16)]: Done 449 tasks | elapsed: 25.5s\n", 648 | "[Parallel(n_jobs=16)]: Done 480 tasks | elapsed: 26.9s\n", 649 | "[Parallel(n_jobs=16)]: Done 513 tasks | elapsed: 28.5s\n", 650 | "[Parallel(n_jobs=16)]: Done 546 tasks | elapsed: 30.3s\n", 651 | "[Parallel(n_jobs=16)]: Done 581 tasks | elapsed: 32.0s\n", 652 | "[Parallel(n_jobs=16)]: Done 616 tasks | elapsed: 33.8s\n", 653 | "[Parallel(n_jobs=16)]: Done 653 tasks | elapsed: 36.0s\n", 654 | "[Parallel(n_jobs=16)]: Done 690 tasks | elapsed: 38.5s\n", 655 | "[Parallel(n_jobs=16)]: Done 729 tasks | elapsed: 41.0s\n", 656 | "[Parallel(n_jobs=16)]: Done 768 tasks | elapsed: 43.0s\n", 657 | "[Parallel(n_jobs=16)]: Done 809 tasks | elapsed: 45.5s\n" 658 | ] 659 | }, 660 | { 661 | "name": "stdout", 662 | "output_type": "stream", 663 | "text": [ 664 | "DecisionTreeRegressor(criterion='mse', max_depth=20, max_features=None,\n", 665 | " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", 666 | " min_impurity_split=None, min_samples_leaf=10,\n", 667 | " min_samples_split=100, min_weight_fraction_leaf=0.0,\n", 668 | " presort=False, random_state=None, splitter='best')\n" 669 | ] 670 | }, 671 | { 672 | "name": "stderr", 673 | "output_type": "stream", 674 | "text": [ 675 | "[Parallel(n_jobs=16)]: Done 840 out of 840 | elapsed: 46.7s finished\n" 676 | ] 677 | } 678 | ], 679 | "source": [ 680 | "DT = GridSearchCV(estimator=DecisionTreeRegressor(), param_grid = DT_param_grid, \n", 681 | " scoring='neg_mean_absolute_error', \n", 682 | " cv=10, \n", 683 | " verbose=10, n_jobs = 16)\n", 684 | "DT.fit(X_train, y_train)\n", 685 | "DTbest = DT.best_estimator_\n", 686 | "print(DTbest)" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": 22, 692 | "metadata": {}, 693 | "outputs": [], 694 | "source": [ 695 | "# save the model\n", 696 | "savemodel(\"DT\", DTbest)" 697 | ] 698 | }, 699 | { 700 | "cell_type": "markdown", 701 | "metadata": {}, 702 | "source": [ 703 | "## (c) Model performance, let's compare with a baseline model" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 41, 709 | "metadata": {}, 710 | "outputs": [ 711 | { 712 | "name": "stdout", 713 | "output_type": "stream", 714 | "text": [ 715 | "MAE with rounded predictions: 1.49\n" 716 | ] 717 | } 718 | ], 719 | "source": [ 720 | "# make predictions\n", 721 | "ypred = pd.DataFrame(DTbest.predict(X_test))\n", 722 | "\n", 723 | "# but sales volumes are integers so, we round each prediction to the closest integer\n", 724 | "ypred_round = round(ypred)\n", 725 | "\n", 726 | "# get the MAE\n", 727 | "MAE = mean_absolute_error(y_test, ypred_round)\n", 728 | "\n", 729 | "print('MAE with rounded predictions:', MAE)\n" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 24, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "# baseline model predicts that sales for August 15 are exactly the sales from the previous day \n", 739 | "y_baseline = y_train.iloc[-100:, :]" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": 25, 745 | "metadata": {}, 746 | "outputs": [ 747 | { 748 | "name": "stdout", 749 | "output_type": "stream", 750 | "text": [ 751 | "MAE of baseline: 1.94\n" 752 | ] 753 | } 754 | ], 755 | "source": [ 756 | "# get the MAE\n", 757 | "MAE_b = mean_absolute_error(y_test, y_baseline)\n", 758 | "print(\"MAE of baseline: \", MAE_b)" 759 | ] 760 | }, 761 | { 762 | "cell_type": "markdown", 763 | "metadata": {}, 764 | "source": [ 765 | "# Export data for the optimization problem\n", 766 | "We will switch to Julia in order to solve the optimization problem.\n", 767 | "\n", 768 | "To do so, we will need the values `y_train`, `y_pred`, `y_baseline`, as well as the leaves in the decision tree we built that correspond to each value y." 769 | ] 770 | }, 771 | { 772 | "cell_type": "code", 773 | "execution_count": 26, 774 | "metadata": {}, 775 | "outputs": [], 776 | "source": [ 777 | "# predictions y hat:\n", 778 | "ypred_round.to_csv(\"y_pred.csv\")\n", 779 | "y_baseline.to_csv(\"y_baseline.csv\")" 780 | ] 781 | }, 782 | { 783 | "cell_type": "code", 784 | "execution_count": 38, 785 | "metadata": {}, 786 | "outputs": [], 787 | "source": [ 788 | "# transform boolean into integers\n", 789 | "X_test[\"ondisplay\"] = X_test[\"ondisplay\"].astype('int64')\n", 790 | "X_test[\"onpromotion\"] = X_test[\"onpromotion\"].astype('int64')\n", 791 | "X_test[\"isHoliday\"] = X_test[\"isHoliday\"].astype('int64')\n", 792 | "X_test.to_csv(\"X_test_int.csv\")" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": 27, 798 | "metadata": {}, 799 | "outputs": [ 800 | { 801 | "name": "stderr", 802 | "output_type": "stream", 803 | "text": [ 804 | "C:\\Users\\utilisateur\\.julia\\v0.6\\Conda\\deps\\usr\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n", 805 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 806 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 807 | "\n", 808 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 809 | " after removing the cwd from sys.path.\n", 810 | "C:\\Users\\utilisateur\\.julia\\v0.6\\Conda\\deps\\usr\\lib\\site-packages\\ipykernel_launcher.py:5: SettingWithCopyWarning: \n", 811 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 812 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 813 | "\n", 814 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 815 | " \"\"\"\n", 816 | "C:\\Users\\utilisateur\\.julia\\v0.6\\Conda\\deps\\usr\\lib\\site-packages\\ipykernel_launcher.py:6: SettingWithCopyWarning: \n", 817 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 818 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 819 | "\n", 820 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 821 | " \n" 822 | ] 823 | } 824 | ], 825 | "source": [ 826 | "# export the baseline data for the optimization model in Julia\n", 827 | "X_baseline = X_train.iloc[-100:, :]\n", 828 | "# transform boolean into integers\n", 829 | "X_baseline[\"ondisplay\"] = X_baseline[\"ondisplay\"].astype('int64')\n", 830 | "X_baseline[\"onpromotion\"] = X_baseline[\"onpromotion\"].astype('int64')\n", 831 | "X_baseline[\"isHoliday\"] = X_baseline[\"isHoliday\"].astype('int64')\n", 832 | "X_baseline.to_csv(\"X_baseline.csv\")" 833 | ] 834 | }, 835 | { 836 | "cell_type": "code", 837 | "execution_count": 28, 838 | "metadata": {}, 839 | "outputs": [], 840 | "source": [ 841 | "# get the leaf' number where each observation belongs\n", 842 | "leaf_belong_train = DTbest.apply(X_train)\n", 843 | "leaf_belong_test = DTbest.apply(X_test)" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 29, 849 | "metadata": {}, 850 | "outputs": [], 851 | "source": [ 852 | "# pull out the leaves for both data sets: each row is a pair of {unit_sales, leaf where the corresponding observation was}\n", 853 | "\n", 854 | "train_leaves = []\n", 855 | "for i in range(len(y_train)):\n", 856 | " train_leaves.append((list(y_train['unit_sales'])[i], leaf_belong_train[i]))\n", 857 | " \n", 858 | "test_leaves = []\n", 859 | "for i in range(len(y_test)):\n", 860 | " test_leaves.append((list(y_test['unit_sales'])[i], leaf_belong_test[i]))" 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": 30, 866 | "metadata": {}, 867 | "outputs": [], 868 | "source": [ 869 | "# export the leaves \n", 870 | "train_leaves = np.array(train_leaves)\n", 871 | "np.savetxt(\"train_leaves.csv\", train_leaves, delimiter=\",\")\n", 872 | "\n", 873 | "test_leaves = np.array(test_leaves)\n", 874 | "np.savetxt(\"test_leaves.csv\", test_leaves, delimiter=\",\")" 875 | ] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "execution_count": 32, 880 | "metadata": {}, 881 | "outputs": [], 882 | "source": [ 883 | "# function to get all unit_sales values belonging to a leaf\n", 884 | "\n", 885 | "def fetch_points_in_leaf(leaf, train_leaves):\n", 886 | " n = len(train_leaves)\n", 887 | " in_leaf = []\n", 888 | " for x in train_leaves:\n", 889 | " if x[1] == leaf:\n", 890 | " in_leaf.append(x[0])\n", 891 | " return(in_leaf)" 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": 33, 897 | "metadata": {}, 898 | "outputs": [], 899 | "source": [ 900 | "# for the purpose to get a matrix where every row has the same length\n", 901 | "# we add -1 to complete the rows\n", 902 | "def complete_list(l, length):\n", 903 | " if len(l) < length:\n", 904 | " new_list = [-1 for i in range(length)]\n", 905 | " for i in range(len(l)):\n", 906 | " new_list[i] = l[i]\n", 907 | " return(new_list)" 908 | ] 909 | }, 910 | { 911 | "cell_type": "code", 912 | "execution_count": 34, 913 | "metadata": {}, 914 | "outputs": [], 915 | "source": [ 916 | "# gives the unit_sales from the training set which are in a particular leaf\n", 917 | "def get_leaf_points(train_leaves, test_leaves):\n", 918 | " test_leaf_data = []\n", 919 | " for x in test_leaves:\n", 920 | " leaf = x[1] # leaf where y_pred[i] belongs\n", 921 | " in_leaf = fetch_points_in_leaf(leaf, train_leaves)\n", 922 | " test_leaf_data.append( complete_list(in_leaf, len(train_leaves)) ) \n", 923 | " return(test_leaf_data)" 924 | ] 925 | }, 926 | { 927 | "cell_type": "code", 928 | "execution_count": 35, 929 | "metadata": {}, 930 | "outputs": [], 931 | "source": [ 932 | "# get the weights corresponding to a leaf\n", 933 | "# equals to 1/(number of points in the leaf)\n", 934 | "\n", 935 | "def get_weight(train_leaves, test_leaves):\n", 936 | " weights = []\n", 937 | " leaf_points = get_leaf_points(train_leaves, test_leaves)\n", 938 | " for x in leaf_points:\n", 939 | " count = 0\n", 940 | " for i in range(len(x)):\n", 941 | " if x[i] != -1:\n", 942 | " count +=1\n", 943 | " weights.append(1/count)\n", 944 | " return(weights)" 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": 36, 950 | "metadata": {}, 951 | "outputs": [], 952 | "source": [ 953 | "test_leaf_data = get_leaf_points(train_leaves, test_leaves)\n", 954 | "weights = get_weight(train_leaves, test_leaves)" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": 37, 960 | "metadata": {}, 961 | "outputs": [], 962 | "source": [ 963 | "# export the leaves data\n", 964 | "\n", 965 | "test_leaf_data = np.array(test_leaf_data)\n", 966 | "test_leaf_data = np.transpose(test_leaf_data) # transpose because otherwise, the csv will have 22500 columns VS 100 columns\n", 967 | "np.savetxt(\"test_leaf_data_T.csv\", test_leaf_data, delimiter=\",\")\n", 968 | "weights = np.array(weights)\n", 969 | "np.savetxt(\"weights.csv\", weights, delimiter=\",\")" 970 | ] 971 | } 972 | ], 973 | "metadata": { 974 | "kernelspec": { 975 | "display_name": "Python 3", 976 | "language": "python", 977 | "name": "python3" 978 | }, 979 | "language_info": { 980 | "codemirror_mode": { 981 | "name": "ipython", 982 | "version": 3 983 | }, 984 | "file_extension": ".py", 985 | "mimetype": "text/x-python", 986 | "name": "python", 987 | "nbconvert_exporter": "python", 988 | "pygments_lexer": "ipython3", 989 | "version": "3.6.5" 990 | } 991 | }, 992 | "nbformat": 4, 993 | "nbformat_minor": 2 994 | } 995 | -------------------------------------------------------------------------------- /Homework4-Precriptive-approach-to-forecast-stocks-sales/hw4-Q4-prescriptive-approach-for-stocks-predictions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "15.095. Homework 4\n", 8 | "\n", 9 | "Kim-Anh-Nhi Nguyen\n", 10 | "\n", 11 | "# Question 4" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "using JuMP, Gurobi, DataFrames" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stderr", 30 | "output_type": "stream", 31 | "text": [ 32 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 33 | "Stacktrace:\n", 34 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 35 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 36 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 37 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 38 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 39 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 40 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 41 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 42 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 43 | "while loading In[3], in expression starting on line 1\n", 44 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 45 | "Stacktrace:\n", 46 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 47 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 48 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 49 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 50 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 51 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 52 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 53 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 54 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 55 | "while loading In[3], in expression starting on line 2\n", 56 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 57 | "Stacktrace:\n", 58 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 59 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 60 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 61 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 62 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 63 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 64 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 65 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 66 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 67 | "while loading In[3], in expression starting on line 3\n", 68 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 69 | "Stacktrace:\n", 70 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 71 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 72 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 73 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 74 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 75 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 76 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 77 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 78 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 79 | "while loading In[3], in expression starting on line 4\n", 80 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 81 | "Stacktrace:\n", 82 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 83 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 84 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 85 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 86 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 87 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 88 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 89 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 90 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 91 | "while loading In[3], in expression starting on line 5\n" 92 | ] 93 | }, 94 | { 95 | "name": "stderr", 96 | "output_type": "stream", 97 | "text": [ 98 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 99 | "Stacktrace:\n", 100 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 101 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 102 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 103 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 104 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 105 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 106 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 107 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 108 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 109 | "while loading In[3], in expression starting on line 6\n", 110 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 111 | "Stacktrace:\n", 112 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 113 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 114 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 115 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 116 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 117 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 118 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 119 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 120 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 121 | "while loading In[3], in expression starting on line 7\n", 122 | "\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mreadtable is deprecated, use CSV.read from the CSV package instead\u001b[39m\n", 123 | "Stacktrace:\n", 124 | " [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n", 125 | " [2] \u001b[1m#readtable#232\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Char, ::Array{Char,1}, ::Char, ::Array{String,1}, ::Array{String,1}, ::Array{String,1}, ::Bool, ::Int64, ::Array{Symbol,1}, ::Array{Any,1}, ::Bool, ::Char, ::Bool, ::Int64, ::Array{Int64,1}, ::Bool, ::Symbol, ::Bool, ::Bool, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\DataFrames\\src\\deprecated.jl:1045\u001b[22m\u001b[22m\n", 126 | " [3] \u001b[1m(::DataFrames.#kw##readtable)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#readtable, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n", 127 | " [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n", 128 | " [5] \u001b[1msoftscope_include_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Module, ::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\SoftGlobalScope\\src\\SoftGlobalScope.jl:66\u001b[22m\u001b[22m\n", 129 | " [6] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\execute_request.jl:193\u001b[22m\u001b[22m\n", 130 | " [7] \u001b[1m(::Compat.#inner#6{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\Compat\\src\\Compat.jl:189\u001b[22m\u001b[22m\n", 131 | " [8] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mC:\\Users\\utilisateur\\.julia\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n", 132 | " [9] \u001b[1m(::IJulia.##13#16)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n", 133 | "while loading In[3], in expression starting on line 8\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "train_leaves = readtable(\"train_leaves.csv\", header=false)\n", 139 | "test_leaves = readtable(\"test_leaves.csv\", header=false)\n", 140 | "test_leaf_data = readtable(\"test_leaf_data_T.csv\", header=false)\n", 141 | "weights = readtable(\"weights.csv\", header=false)\n", 142 | "X_test = readtable(\"X_test_int.csv\", header=true)\n", 143 | "y_test = readtable(\"y_test.csv\", header=true)\n", 144 | "y_baseline = readtable(\"y_baseline.csv\", header=true)\n", 145 | "X_baseline = readtable(\"X_baseline.csv\", header=true);" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "# target \n", 155 | "y_pred = test_leaves[:, 1];\n", 156 | "y_baseline = y_baseline[:unit_sales];" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## Function to get elements in the leaf of y_pred[i]" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 5, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "function get_data_points_in_leaf(i) # i is in 1:100\n", 173 | " init = test_leaf_data[i][:]\n", 174 | " points = zeros(0)\n", 175 | " for j in 1:length(init)\n", 176 | " if init[j] != -1\n", 177 | " append!(points, init[j])\n", 178 | " end\n", 179 | " end\n", 180 | " return(points)\n", 181 | "end;" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "# Optimization Model" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 6, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "100" 200 | ] 201 | }, 202 | "execution_count": 6, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "# number of items\n", 209 | "n = 100" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 7, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "function optimize_profit(Q, data = X_test, target = y_pred, prescription = true)\n", 219 | " m = Model(solver = GurobiSolver(OutputFlag = 0))\n", 220 | "\n", 221 | " # decision variables\n", 222 | " if prescription == false\n", 223 | " @variable(m, s[1:n])\n", 224 | " end\n", 225 | " \n", 226 | " @variable(m, z[1:n])\n", 227 | " @constraint(m, sum(z) <= Q)\n", 228 | " \n", 229 | " for i = 1:n\n", 230 | " @constraint(m, z[i] <= 0.1 * Q * data[:ondisplay][i])\n", 231 | " \n", 232 | " \n", 233 | " if prescription == false\n", 234 | " @constraint(m, s[i] >= 0)\n", 235 | " @constraint(m, s[i] <= target[i])\n", 236 | " @constraint(m, s[i] <= z[i])\n", 237 | " end\n", 238 | " end\n", 239 | "\n", 240 | " @constraint(m, z[1:n] .>= 0)\n", 241 | " \n", 242 | " \n", 243 | " # objective function\n", 244 | " profit = 0\n", 245 | " \n", 246 | " if prescription == true\n", 247 | " for i = 1:n\n", 248 | " sub_profit = 0\n", 249 | " weight = weights[1][i]\n", 250 | " \n", 251 | " points = get_data_points_in_leaf(i) # points from the training set that belong to the same leaf as y_pred[i]\n", 252 | " p = length(points)\n", 253 | " x = @variable(m, [1:p])\n", 254 | " \n", 255 | " price = data[:price][i]\n", 256 | " promo = data[:onpromotion][i]\n", 257 | " cost = data[:cost][i]\n", 258 | " perish = data[:perishable][i]\n", 259 | " \n", 260 | " for j in 1:p\n", 261 | " @constraint(m, x[j] <= points[j])\n", 262 | " @constraint(m, x[j] <= z[i])\n", 263 | " \n", 264 | " sub_profit += (price *(1 - 0.2 * promo) - cost) * x[j] - (z[i] - x[j]) * cost * perish\n", 265 | " \n", 266 | " end\n", 267 | " sub_profit = sub_profit * weight\n", 268 | " profit += sub_profit\n", 269 | " end\n", 270 | "\n", 271 | " else\n", 272 | " for i = 1:n\n", 273 | " price = data[:price][i]\n", 274 | " promo = data[:onpromotion][i]\n", 275 | " cost = data[:cost][i]\n", 276 | " perish = data[:perishable][i]\n", 277 | " profit += (price *(1 - 0.2 * promo) - cost) * s[i] - (z[i] - s[i]) * cost * perish\n", 278 | " end\n", 279 | " end\n", 280 | " \n", 281 | " @objective(m, Max, profit)\n", 282 | "\n", 283 | " status = solve(m)\n", 284 | "\n", 285 | " stocks = getvalue(z)\n", 286 | " r = getobjectivevalue(m)\n", 287 | " \n", 288 | " final_profit = 0\n", 289 | " oracle_profit = 0\n", 290 | " for i in 1:n\n", 291 | " final_profit += (data[:price][i] * (1 - 0.2 * data[:onpromotion][i]) - data[:cost][i])* min(stocks[i], y_test[:unit_sales][i]) - \n", 292 | " (stocks[i] - min(stocks[i], y_test[:unit_sales][i])) * data[:cost][i] * data[:perishable][i]\n", 293 | " oracle_profit += (data[:price][i] * (1 - 0.2 * data[:onpromotion][i]) - data[:cost][i])* y_test[:unit_sales][i]\n", 294 | " end\n", 295 | " \n", 296 | " return(stocks, final_profit)\n", 297 | "end;" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "# Get the decisions: what stocks levels do we need for each item?" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 8, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "615.998" 316 | ] 317 | }, 318 | "execution_count": 8, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "oracle_profit = 0\n", 325 | "for i in 1:n\n", 326 | " oracle_profit += (X_test[:price][i]*(1 - 0.2 * X_test[:onpromotion][i]) - X_test[:cost][i])* y_test[:unit_sales][i]\n", 327 | "end\n", 328 | "oracle_profit" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 9, 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "name": "stdout", 338 | "output_type": "stream", 339 | "text": [ 340 | "----- RESULTS for Q = 1 -----\n", 341 | "Academic license - for non-commercial use only\n", 342 | "* Baseline profit: 3.4350000000000005\n", 343 | "* Optimality gap: 612.5630000000001\n", 344 | "\n", 345 | "Academic license - for non-commercial use only\n", 346 | "* Our profit: 3.0112\n", 347 | "* Optimality gap: 612.9868\n", 348 | "* Percentage gain in profit: -12.337700145560419 % \n", 349 | "\n", 350 | "\n", 351 | "----- RESULTS for Q = 10 -----\n", 352 | "Academic license - for non-commercial use only\n", 353 | "* Baseline profit: 34.35\n", 354 | "* Optimality gap: 581.648\n", 355 | "\n", 356 | "Academic license - for non-commercial use only\n", 357 | "* Our profit: 30.112\n", 358 | "* Optimality gap: 585.8860000000001\n", 359 | "* Percentage gain in profit: -12.337700145560417 % \n", 360 | "\n", 361 | "\n", 362 | "----- RESULTS for Q = 50 -----\n", 363 | "Academic license - for non-commercial use only\n", 364 | "* Baseline profit: 129.84\n", 365 | "* Optimality gap: 486.158\n", 366 | "\n", 367 | "Academic license - for non-commercial use only\n", 368 | "* Our profit: 163.682\n", 369 | "* Optimality gap: 452.31600000000003\n", 370 | "* Percentage gain in profit: 26.06438693776955 % \n", 371 | "\n", 372 | "\n", 373 | "----- RESULTS for Q = 100 -----\n", 374 | "Academic license - for non-commercial use only\n", 375 | "* Baseline profit: 226.56200000000004\n", 376 | "* Optimality gap: 389.43600000000004\n", 377 | "\n", 378 | "Academic license - for non-commercial use only\n", 379 | "* Our profit: 280.682\n", 380 | "* Optimality gap: 335.31600000000003\n", 381 | "* Percentage gain in profit: 23.887500993105625 % \n", 382 | "\n", 383 | "\n", 384 | "----- RESULTS for Q = 500 -----\n", 385 | "Academic license - for non-commercial use only\n", 386 | "* Baseline profit: 412.096\n", 387 | "* Optimality gap: 203.90200000000004\n", 388 | "\n", 389 | "Academic license - for non-commercial use only\n", 390 | "* Our profit: 539.772\n", 391 | "* Optimality gap: 76.226\n", 392 | "* Percentage gain in profit: 30.98210125795932 % \n", 393 | "\n", 394 | "\n", 395 | "----- RESULTS for Q = 1000 -----\n", 396 | "Academic license - for non-commercial use only\n", 397 | "* Baseline profit: 412.096\n", 398 | "* Optimality gap: 203.90200000000004\n", 399 | "\n", 400 | "Academic license - for non-commercial use only\n", 401 | "* Our profit: 575.0260000000001\n", 402 | "* Optimality gap: 40.97199999999998\n", 403 | "* Percentage gain in profit: 39.536904022363736 % \n", 404 | "\n", 405 | "\n", 406 | "----- RESULTS for Q = 10000 -----\n", 407 | "Academic license - for non-commercial use only\n", 408 | "* Baseline profit: 412.096\n", 409 | "* Optimality gap: 203.90200000000004\n", 410 | "\n", 411 | "Academic license - for non-commercial use only\n", 412 | "* Our profit: 575.0260000000001\n", 413 | "* Optimality gap: 40.97199999999998\n", 414 | "* Percentage gain in profit: 39.536904022363736 % \n", 415 | "\n", 416 | "\n", 417 | "----- RESULTS for Q = 100000 -----\n", 418 | "Academic license - for non-commercial use only\n", 419 | "* Baseline profit: 412.096\n", 420 | "* Optimality gap: 203.90200000000004\n", 421 | "\n", 422 | "Academic license - for non-commercial use only\n", 423 | "* Our profit: 575.0260000000001\n", 424 | "* Optimality gap: 40.97199999999998\n", 425 | "* Percentage gain in profit: 39.536904022363736 % \n", 426 | "\n", 427 | "\n" 428 | ] 429 | } 430 | ], 431 | "source": [ 432 | "# implement the prescriptive method for different values of Q\n", 433 | "Qs = [1, 10, 50, 100, 500, 1000, 10000, 100000]\n", 434 | "\n", 435 | "for Q in Qs\n", 436 | " println(\"----- RESULTS for Q = \", Q, \" -----\")\n", 437 | " \n", 438 | " # baseline model\n", 439 | " stocks_baseline, profit_base = optimize_profit(Q, X_test, y_baseline, false)\n", 440 | " #println(\"----- The baseline suggest we stock the following volumns for each item: -----\\n\", stocks_baseline)\n", 441 | " println(\"* Baseline profit: \", profit_base)\n", 442 | " println(\"* Optimality gap: \", oracle_profit - profit_base, \"\\n\")\n", 443 | " \n", 444 | " # prescriptions\n", 445 | " stocks, profit = optimize_profit(Q, X_test, y_pred, true);\n", 446 | " #println(\"----- We should stock the following volumns for each item: -----\\n\", stocks)\n", 447 | " println(\"* Our profit: \", profit)\n", 448 | " println(\"* Optimality gap: \", oracle_profit - profit)\n", 449 | " # percentage gain in profit\n", 450 | " gain = (profit-profit_base)/profit_base\n", 451 | " println(\"* Percentage gain in profit: \", gain*100, \" % \\n\\n\")\n", 452 | " \n", 453 | "end" 454 | ] 455 | } 456 | ], 457 | "metadata": { 458 | "kernelspec": { 459 | "display_name": "Julia 0.6.4", 460 | "language": "julia", 461 | "name": "julia-0.6" 462 | }, 463 | "language_info": { 464 | "file_extension": ".jl", 465 | "mimetype": "application/julia", 466 | "name": "julia", 467 | "version": "0.6.4" 468 | } 469 | }, 470 | "nbformat": 4, 471 | "nbformat_minor": 2 472 | } 473 | -------------------------------------------------------------------------------- /Homework4-Precriptive-approach-to-forecast-stocks-sales/hw4-answers-Nhi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework4-Precriptive-approach-to-forecast-stocks-sales/hw4-answers-Nhi.pdf -------------------------------------------------------------------------------- /Homework4-Precriptive-approach-to-forecast-stocks-sales/hw4-subject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanguyn/Machine-Learning-Under-Optimization-Lens-Fall2018/8e7ecc1eb36b21cf5f1f10f6c8299bc14a6ba766/Homework4-Precriptive-approach-to-forecast-stocks-sales/hw4-subject.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning-Under-Optimization-Lens-Fall2018 2 | Work from class 15.095 Machine Learning Under a Modern Optimization Lens at MIT taught by Dimitris Bertsimas and Martin Copenhaver 3 | 4 | ## Course Content and Objectives: 5 | 6 | The majority of the central problems of regression, classification, and estimation have been addressed using heuristic methods even though they can be formulated as formal optimization problems. While continuous optimization approaches have had a significant impact in Machine Learning (ML)/Statistics (S), mixed integer optimization (MIO) has played a very limited role, primarily based on the belief that MIO models are computationally intractable. The last three decades have witnessed (a) algorithmic advances in MIO, which coupled 7 | with hardware improvements have resulted in an astonishing over 2 trillion factor speedup in solving MIO problems, (b) significant advances in our ability to model and solve very high dimensional robust and convex optimization models. The objective in this course is to revisit some of the classical problems in ML/S and demonstrate that they can greatly benefitt from a modern optimization treatment. The optimization lenses we use in this course include convex, robust, and mixed integer optimization. In all cases we demonstrate that optimal solutions to large scale instances (a) can be found in seconds, (b) can be certified to be optimal/near-optimal in minutes and (c) outperform classical heuristic approaches in out of sample experiments involving real and synthetic data. 8 | 9 | The problems we address in this course include: 10 | * variable selection in linear and logistic regression, 11 | * convex, robust, and median regression, 12 | * an algorithmic framework to construct linear and logistic regression models that satisfy properties of sparsity, robustness, significance, absence of multi-collinearity in an optimal way, 13 | * clustering, 14 | * deep learning, 15 | * how to transform predictive algorithms to prescriptive algorithms, 16 | * optimal prescriptive trees, 17 | * the design of experiments via optimization , 18 | * missing data imputation 19 | 20 | ## Homework / assignments 21 | The course had 4 assignments and one final project. The 4 assignments were: 22 | * 1: Solve a linear optimization problem and build a robust linear regression model 23 | * 2: Build an algorithmic framework for linear regression that satisfies properties of sparsity, robustness, significance, absence of multi-collinearity in an optimal way 24 | * 3: Construct state-of-the-art optimal tree models 25 | * 4: Perform prescriptive analysis to decide how much stock to have in a store to optimize revenue 26 | 27 | ## Details 28 | * _Language_: Python and Julia/JuMP over Jupyter Notebooks. 29 | * _Libraries_ in Python: numpy, pandas, matplotlib, scitkit-learn 30 | * _Libraries_ in Julia: JuMP, DataFrames, Gurobi, Plots, MLDataUtils, OptimalTrees, OptImpute 31 | 32 | ## Authors 33 | * Kim-Anh-Nhi Nguyen @kanguyn 34 | 35 | ## Sources and acknowledgments 36 | * Prof.: [Dimitris Bertsimas](https://web-cert.mit.edu/dbertsim/www/) and [Martin Copenhaver](http://www.mit.edu/~mcopen/) 37 | * Teaching assistants: [Colin Pawlowski](cpawlows@mit.edu) and [Yuchen Wang](yuchenw@mit.edu) 38 | --------------------------------------------------------------------------------