├── README.md ├── zind-airqo-final-blend.ipynb ├── darius-model-1.ipynb ├── darius-model-2.ipynb └── zindi-airqo-cnn-quick.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # zindi-airqo 2 | 3 | 4 | 5 | 1. Run the 4 notebooks 'darius-model-1.ipynb', 'darius-model-2.ipynb', 'model-3.ipynb' and 6 | 'zindi-airqo-cnn-quick.ipynb' in any order. The outputs generated by the notebooks are kept already placed in the current folder. 7 | 2. Run the final notebook 'zind-airqo-final-blend.ipynb' only after running the above 4 notebooks in any order to get the final output 'zindi_airqo_final_sub.csv'. 8 | -------------------------------------------------------------------------------- /zind-airqo-final-blend.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "airqo-ugandan-air-quality-forecast-challenge #2 3 STRoNG !!!.zip\n", 25 | "cnn_preds.csv\n", 26 | "darius-model-1.ipynb\n", 27 | "darius-model-2.ipynb\n", 28 | "input\n", 29 | "model-3.ipynb\n", 30 | "model_12_blend.csv\n", 31 | "model_2.csv\n", 32 | "model_3.csv\n", 33 | "readme.txt\n", 34 | "zind-airqo-final-blend.ipynb\n", 35 | "zindi-airqo-cnn-quick.ipynb\n", 36 | "zindi_airqo_final_sub.csv\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "!ls" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": { 48 | "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", 49 | "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "df = pd.read_csv('model_2.csv')[['ID']]\n", 55 | "df = pd.merge(df, pd.read_csv('model_12_blend.csv').rename({'target': 'A'}, axis=1), on = 'ID', how='left')\n", 56 | "df = pd.merge(df, pd.read_csv('model_2.csv').rename({'target': 'B'}, axis=1), on = 'ID', how='left')\n", 57 | "df = pd.merge(df, pd.read_csv('model_3.csv').rename({'target': 'C'}, axis=1), on = 'ID', how='left')\n", 58 | "df = pd.merge(df, pd.read_csv('cnn_preds.csv').rename({'target': 'D'}, axis=1), on = 'ID', how='left')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/html": [ 69 | "
\n", 70 | "\n", 83 | "\n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | "
ABCD
A1.0000000.9724630.9819050.937631
B0.9724631.0000000.9668360.932886
C0.9819050.9668361.0000000.941585
D0.9376310.9328860.9415851.000000
\n", 124 | "
" 125 | ], 126 | "text/plain": [ 127 | " A B C D\n", 128 | "A 1.000000 0.972463 0.981905 0.937631\n", 129 | "B 0.972463 1.000000 0.966836 0.932886\n", 130 | "C 0.981905 0.966836 1.000000 0.941585\n", 131 | "D 0.937631 0.932886 0.941585 1.000000" 132 | ] 133 | }, 134 | "execution_count": 4, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "df[['A', 'B', 'C', 'D']].corr()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 5, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stderr", 150 | "output_type": "stream", 151 | "text": [ 152 | "C:\\Anaconda5\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 153 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 154 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 155 | "\n", 156 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 157 | " \n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "final_sub_df = df[['ID']]\n", 163 | "final_sub_df['target'] = ((df['A']*0.6 + df['B']*0.4)*0.35 + df['C']*0.65)*0.85 + df['D']*0.15\n", 164 | "final_sub_df.to_csv('zindi_airqo_final_sub.csv', index=False)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 6, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stderr", 174 | "output_type": "stream", 175 | "text": [ 176 | "C:\\Anaconda5\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 177 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 178 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 179 | "\n", 180 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 181 | " \n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "final_sub_df = df[['ID']]\n", 187 | "final_sub_df['target'] = (df['A']*0.6 + df['B']*0.4)\n", 188 | "final_sub_df.to_csv('zindi_airqo_model_1_and_2_only.csv', index=False)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.6.3" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 4 222 | } 223 | -------------------------------------------------------------------------------- /darius-model-1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true, 8 | "id": "pk5WiopT9oJw", 9 | "outputId": "446b6db5-1cbe-4beb-e1e2-4bcdd512bab6" 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "# installing catboost\n", 14 | "# Catboost == 0.22 was the version of catboost at the start of this competition\n", 15 | "!pip install catboost==0.22 --quiet" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true, 23 | "id": "jVXG4QlTuVYr" 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# Importing libraries\n", 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "import warnings\n", 31 | "import joblib\n", 32 | "\n", 33 | "from tqdm import tqdm, tqdm_notebook\n", 34 | "from functools import reduce\n", 35 | "from time import time\n", 36 | "\n", 37 | "from catboost import CatBoostRegressor, CatBoostClassifier\n", 38 | "from sklearn.utils import shuffle\n", 39 | "\n", 40 | "pd.set_option('display.max_rows', 1000) \n", 41 | "warnings.filterwarnings('ignore')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": true, 49 | "id": "VJjtuM3kvCAT" 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "# Loading data\n", 54 | "train = pd.read_csv('./input/Train.csv')\n", 55 | "test = pd.read_csv('./input/Test.csv')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "start = time()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true, 74 | "id": "4MhDKOpYF9dm" 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# Separating the target variable from the training dataframe\n", 79 | "#\n", 80 | "target = train.target\n", 81 | "\n", 82 | "# Aligning the train and test dataframes\n", 83 | "#\n", 84 | "train, test = train.align(test, join='inner', axis=1)\n", 85 | "\n", 86 | "# Creating a separator column to both train and test,\n", 87 | "# This is to be used in separation\n", 88 | "#\n", 89 | "train['separator'] = 0\n", 90 | "test['separator'] = 1\n", 91 | "\n", 92 | "# Combing the train and test dataframes together\n", 93 | "#\n", 94 | "comb = pd.concat([train, test])\n", 95 | "\n", 96 | "# Creating a function to replace all spaces in the dataframe with np.nan\n", 97 | "#\n", 98 | "def replace_nan(x):\n", 99 | " if x == \" \":\n", 100 | " return np.nan\n", 101 | " else:\n", 102 | " return float(x)\n", 103 | "\n", 104 | "# Creating a list of the main columns\n", 105 | "#\n", 106 | "main_cols = [\"temp\", \"precip\", \"rel_humidity\", \"wind_dir\", \"wind_spd\", \"atmos_press\"]\n", 107 | "\n", 108 | "# Replacing spaces with np.nan\n", 109 | "#\n", 110 | "for col in main_cols: \n", 111 | " comb[col] = comb[col].apply(lambda x: [replace_nan(X) for X in x.replace(\"nan\", \" \").split(\",\")])\n", 112 | "\n", 113 | "def make_columns(feature):\n", 114 | " return [f\"{feature}_{i}\" for i in range(1, 122)]\n", 115 | " \n", 116 | "# Generating dataframes of hours for each main column\n", 117 | "#\n", 118 | "comb_temp = pd.DataFrame([x for x in comb.temp], columns=make_columns('temp'))\n", 119 | "comb_precip = pd.DataFrame([x for x in comb.precip], columns=make_columns('precip'))\n", 120 | "comb_rel_humidity = pd.DataFrame([x for x in comb.rel_humidity], columns=make_columns('rel_humidity'))\n", 121 | "comb_wind_dir = pd.DataFrame([x for x in comb.wind_dir], columns=make_columns('wind_dir'))\n", 122 | "comb_wind_spd = pd.DataFrame([x for x in comb.wind_spd], columns=make_columns('wind_spd'))\n", 123 | "comb_atmos_press = pd.DataFrame([x for x in comb.atmos_press], columns=make_columns('atmos_press'))\n", 124 | "\n", 125 | "comb_temp['ID'], comb_precip['ID'], comb_rel_humidity['ID'], comb_wind_dir['ID'], comb_wind_spd['ID'], comb_atmos_press['ID'] = [list(comb.ID)] * 6\n", 126 | "\n", 127 | "# Combining the generated dataframes together\n", 128 | "#\n", 129 | "comb_dfs = [comb, comb_temp, comb_precip, comb_rel_humidity, comb_wind_dir, comb_wind_spd, comb_atmos_press]\n", 130 | "comb = reduce(lambda left, right: pd.merge(left, right, on=['ID'], how='outer'), comb_dfs)\n", 131 | "comb.drop(main_cols, axis=1, inplace=True)\n", 132 | "df = comb.copy()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "code_folding": [], 140 | "collapsed": true, 141 | "id": "3FLjxrAXYbRV", 142 | "outputId": "5d01c7d2-cf39-49ce-d234-6c735a3971d7" 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "# Creating original series for each feature\n", 147 | "orig_cols_dict = {}\n", 148 | "weather_cols = ['temp', 'precip', 'rel_humidity', 'wind_dir','wind_spd', 'atmos_press']\n", 149 | "\n", 150 | "for w in tqdm_notebook(weather_cols):\n", 151 | " selected_cols = [c for c in df.columns if w in c]\n", 152 | " orig_cols_dict[w] = pd.Series(selected_cols)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": true, 160 | "id": "oAGDsJ92YS2Z", 161 | "outputId": "3c4077e0-d77c-4feb-e744-4ef904531c04" 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "# Aggregating features per hour\n", 166 | "\n", 167 | "for w in tqdm_notebook(weather_cols):\n", 168 | " tmp_df = pd.DataFrame()\n", 169 | " tmp_df['weather_col_orig'] = orig_cols_dict[w]\n", 170 | " tmp_df['hours_since_start'] = tmp_df['weather_col_orig'].apply(lambda x: x.split('_')[-1]).astype('int')\n", 171 | " tmp_df['hour_of_day'] = tmp_df['hours_since_start'] % 24\n", 172 | "\n", 173 | " for hour in range(1, 25):\n", 174 | " selected_cols = tmp_df[tmp_df['hour_of_day'] == hour]['weather_col_orig'].tolist()\n", 175 | " df_cols = df[selected_cols] # factorizing this part\n", 176 | " \n", 177 | " df[f'{w}_hour_{hour}_mean'] = df_cols.mean(axis=1)\n", 178 | " df[f'{w}_hour_{hour}_min'] = df_cols.min(axis=1)\n", 179 | " df[f'{w}_hour_{hour}_max'] = df_cols.max(axis=1)\n", 180 | " df[f'{w}_hour_{hour}_range'] = df[f'{w}_hour_{hour}_max'] - df[f'{w}_hour_{hour}_min']\n", 181 | " df[f'{w}_hour_{hour}_skew'] = df_cols.skew()\n", 182 | " df[f'{w}_hour_{hour}_kurt'] = df_cols.kurt()\n", 183 | "\n", 184 | " if hour - 3 > 0 and hour % 3 == 0:\n", 185 | " df[f'{w}_hour_{hour}_prev_hour_mean_diff'] = df[f'{w}_hour_{hour}_mean'] - df[f'{w}_hour_{hour - 3}_mean']\n", 186 | " if hour - 5 > 0 and hour % 3 == 0:\n", 187 | " df[f'{w}_hour_{hour}_prev_hour_mean_diff_5'] = df[f'{w}_hour_{hour}_mean'] - df[f'{w}_hour_{hour - 5}_mean']\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": true, 195 | "id": "pEyFVf1lqInY" 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "comb = df.copy()" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": true, 207 | "id": "WCp_Ukh-NTso", 208 | "outputId": "104e385d-04c6-47af-8bec-582bc0fdfbac" 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "comb.head()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": true, 220 | "id": "xzn_w6FYftB8", 221 | "outputId": "33af157e-0c05-4be9-fd8f-3290f64fcd47" 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "# Creating aggregation features for each variable\n", 226 | "aggs = ['mean', 'std', 'var', 'kurt', 'skew', 'max', 'median', 'sum', 'mode', 'sem', 'min']\n", 227 | "\n", 228 | "for col in tqdm_notebook(main_cols):\n", 229 | " for ag in tqdm(aggs):\n", 230 | " selected_cols = [x for x in comb.columns if x.startswith(col)]\n", 231 | "\n", 232 | " if ag == 'mode':\n", 233 | " aggregate = comb[selected_cols].agg(ag, axis=1)[0]\n", 234 | " else:\n", 235 | " aggregate = comb[selected_cols].agg(ag, axis=1)\n", 236 | " \n", 237 | " comb[col[0] + col[-1] + '_' + ag] = aggregate\n", 238 | "\n", 239 | "# Creating separate dataframes for each variable\n", 240 | "# Creating a list of columns for each separate dataframe\n", 241 | "temp_cols = [x for x in comb.columns if x.startswith('temp')]\n", 242 | "temp = comb[temp_cols]\n", 243 | "\n", 244 | "precip_cols = [x for x in comb.columns if x.startswith('precip')]\n", 245 | "precip = comb[precip_cols]\n", 246 | "\n", 247 | "humid_cols = [x for x in comb.columns if x.startswith('rel_humidity')]\n", 248 | "humid = comb[humid_cols]\n", 249 | "\n", 250 | "wind_dir_cols = [x for x in comb.columns if x.startswith('wind_dir')]\n", 251 | "wind_dir = comb[wind_dir_cols]\n", 252 | "\n", 253 | "wind_spd_cols = [x for x in comb.columns if x.startswith('wind_spd')]\n", 254 | "wind_spd = comb[wind_spd_cols]\n", 255 | "\n", 256 | "atmp_cols = [x for x in comb.columns if x.startswith('atmos_press')]\n", 257 | "atmp = comb[atmp_cols]\n", 258 | "\n", 259 | "fill_cols = comb.columns" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "collapsed": true, 267 | "id": "qdVrf3ZmcFZn" 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "# Generating new features, by adding each variable per hour\n", 272 | "for x, y, z, a, b in zip(temp.columns, precip.columns, humid.columns, wind_spd.columns, atmp.columns):\n", 273 | " comb['add_tp' + y[-4:]] = temp[x] + precip[y] + humid[z] + wind_spd[a] + atmp[b]" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "collapsed": true, 281 | "id": "IU2Z7uDigrgW" 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "# Filling missing values using forward fill\n", 286 | "comb = comb.ffill(axis=1)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true, 294 | "id": "aQTxY80kZ6rH", 295 | "outputId": "a2e1dfe2-2032-4666-9921-e523a4e566e6" 296 | }, 297 | "outputs": [], 298 | "source": [ 299 | "comb.head()" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "collapsed": true 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "def apply_qcut(feat):\n", 311 | " return pd.qcut(comb[feat], 24, labels=False, duplicates='drop')" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "collapsed": true 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "other_features = [x for x in comb.columns if x not in ['separator', 'ID', 'location']]\n", 323 | "\n", 324 | "# Multiprocessing trick: 15 seconds instead of 7 minutes !\n", 325 | "binned_data = joblib.Parallel(n_jobs=-1, backend='multiprocessing')(\n", 326 | " joblib.delayed(apply_qcut)(feat) for feat in tqdm_notebook(other_features))\n", 327 | "\n", 328 | "comb_binned_data = pd.concat(binned_data, axis=1)\n", 329 | "comb = pd.concat([comb[['separator', 'ID', 'location']], comb_binned_data], axis=1)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "collapsed": true 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "comb.head()" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": true, 348 | "id": "w-60osCYdgjT", 349 | "outputId": "7dc3c3bd-ef95-4d34-f090-3c7efb05081a" 350 | }, 351 | "outputs": [], 352 | "source": [ 353 | "# Separating train and test from the combined dataframe\n", 354 | "train = comb[comb.separator == 0]\n", 355 | "test = comb[comb.separator == 1]\n", 356 | "train.drop('separator', axis=1, inplace=True)\n", 357 | "test.drop('separator', axis=1, inplace=True)\n", 358 | "\n", 359 | "# Creating a list of test ids in the order that they will be trained\n", 360 | "testA = test[test.location == 'A']\n", 361 | "testB = test[test.location == 'B']\n", 362 | "testC = test[test.location == 'C']\n", 363 | "testD = test[test.location == 'D']\n", 364 | "testE = test[test.location == 'E']\n", 365 | "\n", 366 | "tA, tD, tE, tBC = testA.ID, testD.ID, testE.ID, test[(test.location == 'B') | (test.location == 'C')].ID\n", 367 | "test_id = pd.concat([tA, tD, tE, tBC])\n", 368 | "\n", 369 | "# Adding back target to the train set\n", 370 | "train['target'] = target" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": { 377 | "collapsed": true 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "end = time()\n", 382 | "print(f\"Total preprocessing time = {end - start:.1f}\")" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": { 389 | "collapsed": true, 390 | "id": "130zceoDarsp" 391 | }, 392 | "outputs": [], 393 | "source": [ 394 | "%%time\n", 395 | "# Creating X and y values\n", 396 | "X = train.drop(['ID', 'location', 'target'], axis=1)\n", 397 | "y = target.values\n", 398 | "\n", 399 | "# Shuffling the X, y values\n", 400 | "X, y = shuffle(X, y, random_state=0)\n", 401 | "tes = test.drop(['ID', 'location'], axis=1)\n", 402 | "\n", 403 | "# Traing the model across multiple seeds\n", 404 | "predictions = []\n", 405 | "for i in tqdm_notebook(range(25)):\n", 406 | " cat = CatBoostRegressor(verbose=False, random_seed=i)\n", 407 | " cat.fit(X, y)\n", 408 | " \n", 409 | " preds = cat.predict(tes)\n", 410 | " predictions.append(preds)\n", 411 | "\n", 412 | "# Averaging the predictions\n", 413 | "avg_preds = np.mean(predictions, axis=0)\n", 414 | "\n", 415 | "# Post processing of the predictions\n", 416 | "# This post processing was done with the help of a validation set.\n", 417 | "# The validation set was adversarial, i.e. we chose the examples from the training set closest to the test set, and applied post processing to it.\n", 418 | "post_proc = [((((((((((x-0.85)*1.015)-0.85)*1.012)-0.75)*1.0095)-0.55)*1.0065)-0.8)*1.007) for x in avg_preds]\n", 419 | "post_proc = predzz = [((x-0.85)*1.015) for x in post_proc]\n", 420 | "\n", 421 | "# Creating a submission file\n", 422 | "sub_df = pd.DataFrame({'ID': test.ID, 'target': post_proc})\n", 423 | "sub_df.to_csv('model_1_1.csv', index=False)" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": true, 431 | "id": "nrK5eVysjIna", 432 | "outputId": "8846d85a-7616-49b3-b0db-d87fc502c993" 433 | }, 434 | "outputs": [], 435 | "source": [ 436 | "%%time\n", 437 | "# Creating a list to hold predictions per seed\n", 438 | "predzz = []\n", 439 | "for i in tqdm_notebook(range(25), leave=False):\n", 440 | " # Creating a list to hold predictions per location\n", 441 | " # Training model per location per seed\n", 442 | " predictions = []\n", 443 | " for area in tqdm_notebook(['A', 'D', 'E'], leave=False):\n", 444 | " # Separating training data per location\n", 445 | " X = train[train.location == area]\n", 446 | " y = X.target\n", 447 | " X = X.drop(['ID', 'location', 'target'], axis=1)\n", 448 | "\n", 449 | " # Shuffling data\n", 450 | " X, y = shuffle(X, y, random_state=0)\n", 451 | "\n", 452 | " # Separating testing data per location\n", 453 | " tes = test[test.location == area]\n", 454 | " tes = tes.drop(['ID', 'location'], axis=1)\n", 455 | "\n", 456 | " # Training the model and making predictions per seed, per location\n", 457 | " preds = CatBoostRegressor(verbose=False, random_seed=i).fit(X, y).predict(tes)\n", 458 | " predictions.extend(preds)\n", 459 | "\n", 460 | " X = train[(train.location == 'B') | (train.location == 'C')]\n", 461 | " y = X.target\n", 462 | " X = X.drop(['ID', 'location', 'target'], axis=1)\n", 463 | " X, y = shuffle(X, y, random_state=0)\n", 464 | "\n", 465 | " tes = test[(test.location == 'B') | (test.location == 'C')]\n", 466 | " tes = tes.drop(['ID', 'location'], axis=1)\n", 467 | " preds = CatBoostRegressor(verbose=False, random_seed=i).fit(X, y).predict(tes)\n", 468 | " predictions.extend(preds)\n", 469 | "\n", 470 | " predzz.append(predictions)\n", 471 | "\n", 472 | "# Averaging the predictions\n", 473 | "preds_av = np.mean(predzz, axis=0)\n", 474 | "\n", 475 | "# Post processing of the predictions\n", 476 | "# This post processing was done with the help of a validation set.\n", 477 | "# The validation set was adversarial, i.e. we chose the examples from the training set closest to the test set, and applied post processing to it.\n", 478 | "predz = [((((((((((x-0.85)*1.015)-0.85)*1.012)-0.75)*1.0095)-0.55)*1.0065)-0.8)*1.007) for x in preds_av]\n", 479 | "predzz = [((x-0.85)*1.015) for x in predz]\n", 480 | "\n", 481 | "# Creating a submission file\n", 482 | "sub_df = pd.DataFrame({'ID': test_id, 'target': predzz})\n", 483 | "sub_df.to_csv('model_1_2.csv', index = False)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 48, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/html": [ 494 | "
\n", 495 | "\n", 508 | "\n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | "
AB
A1.0000000.979202
B0.9792021.000000
\n", 529 | "
" 530 | ], 531 | "text/plain": [ 532 | " A B\n", 533 | "A 1.000000 0.979202\n", 534 | "B 0.979202 1.000000" 535 | ] 536 | }, 537 | "execution_count": 48, 538 | "metadata": {}, 539 | "output_type": "execute_result" 540 | } 541 | ], 542 | "source": [ 543 | "blend_df = pd.read_csv('model_1_1.csv')[['ID']]\n", 544 | "blend_df['A'] = pd.read_csv('model_1_1.csv')['target']\n", 545 | "blend_df = pd.merge(blend_df, pd.read_csv('model_1_2.csv').rename({'target': 'B'}, axis=1), on = 'ID', how = 'left')\n", 546 | "blend_df.corr()" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 53, 552 | "metadata": { 553 | "collapsed": true 554 | }, 555 | "outputs": [], 556 | "source": [ 557 | "blend_df['target'] = blend_df['A']*0.5 + blend_df['B']*0.5\n", 558 | "blend_df[['ID', 'target']].to_csv('model_12_blend.csv', index=False)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 54, 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/html": [ 569 | "
\n", 570 | "\n", 583 | "\n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | "
ABtarget
A1.0000000.9792020.994761
B0.9792021.0000000.994813
target0.9947610.9948131.000000
\n", 613 | "
" 614 | ], 615 | "text/plain": [ 616 | " A B target\n", 617 | "A 1.000000 0.979202 0.994761\n", 618 | "B 0.979202 1.000000 0.994813\n", 619 | "target 0.994761 0.994813 1.000000" 620 | ] 621 | }, 622 | "execution_count": 54, 623 | "metadata": {}, 624 | "output_type": "execute_result" 625 | } 626 | ], 627 | "source": [ 628 | "blend_df.corr()" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": 55, 634 | "metadata": { 635 | "collapsed": true 636 | }, 637 | "outputs": [], 638 | "source": [ 639 | "SUB_FILE_NAME = 'model_12_blend.csv'" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 57, 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "data": { 649 | "text/html": [ 650 | "
\n", 651 | "\n", 664 | "\n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | "
IDtarget
0ID_test_0158.123774
1ID_test_197.217908
2ID_test_1021.393733
3ID_test_10063.222891
4ID_test_100092.046200
5ID_test_100144.955298
6ID_test_100283.270765
7ID_test_100336.458014
8ID_test_100434.101068
9ID_test_100547.728921
\n", 725 | "
" 726 | ], 727 | "text/plain": [ 728 | " ID target\n", 729 | "0 ID_test_0 158.123774\n", 730 | "1 ID_test_1 97.217908\n", 731 | "2 ID_test_10 21.393733\n", 732 | "3 ID_test_100 63.222891\n", 733 | "4 ID_test_1000 92.046200\n", 734 | "5 ID_test_1001 44.955298\n", 735 | "6 ID_test_1002 83.270765\n", 736 | "7 ID_test_1003 36.458014\n", 737 | "8 ID_test_1004 34.101068\n", 738 | "9 ID_test_1005 47.728921" 739 | ] 740 | }, 741 | "execution_count": 57, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | } 745 | ], 746 | "source": [ 747 | "blend_df[['ID', 'target']].head(10)" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 56, 753 | "metadata": {}, 754 | "outputs": [ 755 | { 756 | "data": { 757 | "text/html": [ 758 | "Download CSV file" 759 | ], 760 | "text/plain": [ 761 | "" 762 | ] 763 | }, 764 | "execution_count": 56, 765 | "metadata": {}, 766 | "output_type": "execute_result" 767 | } 768 | ], 769 | "source": [ 770 | "from IPython.display import HTML\n", 771 | "def create_download_link(title = \"Download CSV file\", filename = \"data.csv\"): \n", 772 | " html = '{title}'\n", 773 | " html = html.format(title=title,filename=filename)\n", 774 | " return HTML(html)\n", 775 | "create_download_link(filename = SUB_FILE_NAME)" 776 | ] 777 | } 778 | ], 779 | "metadata": { 780 | "kernelspec": { 781 | "display_name": "Python 3", 782 | "language": "python", 783 | "name": "python3" 784 | }, 785 | "language_info": { 786 | "codemirror_mode": { 787 | "name": "ipython", 788 | "version": 3 789 | }, 790 | "file_extension": ".py", 791 | "mimetype": "text/x-python", 792 | "name": "python", 793 | "nbconvert_exporter": "python", 794 | "pygments_lexer": "ipython3", 795 | "version": "3.6.3" 796 | } 797 | }, 798 | "nbformat": 4, 799 | "nbformat_minor": 4 800 | } 801 | -------------------------------------------------------------------------------- /darius-model-2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 25, 6 | "metadata": { 7 | "id": "pk5WiopT9oJw", 8 | "outputId": "f81ff94a-3399-4a56-9365-6c5e54d80782" 9 | }, 10 | "outputs": [ 11 | { 12 | "name": "stdout", 13 | "output_type": "stream", 14 | "text": [ 15 | "\u001b[33mWARNING: You are using pip version 20.1; however, version 20.1.1 is available.\r\n", 16 | "You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\r\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "# installing catboost\n", 22 | "# Catboost == 0.22 was the version of catboost at the start of this competition\n", 23 | "!pip install catboost==0.22 --quiet" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 27, 29 | "metadata": { 30 | "collapsed": true, 31 | "id": "jVXG4QlTuVYr" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# Importing libraries\n", 36 | "import pandas as pd\n", 37 | "import numpy as np\n", 38 | "from lightgbm import LGBMRegressor\n", 39 | "from xgboost import XGBRegressor, XGBRFRegressor\n", 40 | "from sklearn.model_selection import KFold, cross_val_score, train_test_split\n", 41 | "from sklearn.metrics import mean_squared_error\n", 42 | "from sklearn.utils import shuffle\n", 43 | "from tqdm import tqdm, tqdm_notebook\n", 44 | "from functools import reduce\n", 45 | "from catboost import CatBoostRegressor, CatBoostClassifier\n", 46 | "import joblib\n", 47 | "\n", 48 | "\n", 49 | "import warnings\n", 50 | "warnings.filterwarnings('ignore')\n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 28, 56 | "metadata": { 57 | "collapsed": true, 58 | "id": "agGrUcYwQ7on", 59 | "outputId": "5d4d760c-135f-4e6e-ae14-bc275b174734" 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "# Loading data\n", 64 | "train = pd.read_csv('./input/Train.csv')\n", 65 | "test = pd.read_csv('./input/Test.csv')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 29, 71 | "metadata": { 72 | "collapsed": true, 73 | "id": "jNB_PDTNpxu4" 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# Feature interaction functions\n", 78 | "# There are 4 types of interactions: product interactions, division interactions, sum interactions and divide interactions\n", 79 | "\n", 80 | "def add_prod_interacts(df, inter_cols): \n", 81 | " def apply_interacts(x, inter_cols):\n", 82 | " cols = [x + '_prod_' + c for c in inter_cols[inter_cols.index(x)+1:]]\n", 83 | " interacts_df[cols] = pd.concat([df[x] * df[c] for c in inter_cols[inter_cols.index(x)+1:]], axis=1)\n", 84 | " \n", 85 | " interacts_df = pd.DataFrame()\n", 86 | " _ = df[inter_cols[:-1]].apply(lambda x: apply_interacts(x.name, inter_cols))\n", 87 | " df = pd.concat([df, interacts_df], axis=1)\n", 88 | " return df\n", 89 | "\n", 90 | "\n", 91 | "def add_div_interacts(df, inter_cols): \n", 92 | " def apply_interacts(x, inter_cols):\n", 93 | " cols = [x + '_div_' + c for c in inter_cols[inter_cols.index(x)+1:]]\n", 94 | " interacts_df[cols] = pd.concat([df[x] / df[c] for c in inter_cols[inter_cols.index(x)+1:]], axis=1)\n", 95 | " \n", 96 | " interacts_df = pd.DataFrame()\n", 97 | " _ = df[inter_cols[:-1]].apply(lambda x: apply_interacts(x.name, inter_cols))\n", 98 | " df = pd.concat([df, interacts_df], axis=1)\n", 99 | "\n", 100 | " return df\n", 101 | "\n", 102 | "def add_sum_interacts(df, inter_cols): \n", 103 | " def apply_interacts(x, inter_cols):\n", 104 | " cols = [x + '_sum_' + c for c in inter_cols[inter_cols.index(x)+1:]]\n", 105 | " interacts_df[cols] = pd.concat([df[x] + df[c] for c in inter_cols[inter_cols.index(x)+1:]], axis=1)\n", 106 | " \n", 107 | " interacts_df = pd.DataFrame()\n", 108 | " _ = df[inter_cols[:-1]].apply(lambda x: apply_interacts(x.name, inter_cols))\n", 109 | " df = pd.concat([df, interacts_df], axis=1)\n", 110 | "\n", 111 | " return df\n", 112 | "\n", 113 | "def add_diff_interacts(df, inter_cols): \n", 114 | " def apply_interacts(x, inter_cols):\n", 115 | " cols = [x + '_diff_' + c for c in inter_cols[inter_cols.index(x)+1:]]\n", 116 | " interacts_df[cols] = pd.concat([df[x] - df[c] for c in inter_cols[inter_cols.index(x)+1:]], axis=1)\n", 117 | " \n", 118 | " interacts_df = pd.DataFrame()\n", 119 | " _ = df[inter_cols[:-1]].apply(lambda x: apply_interacts(x.name, inter_cols))\n", 120 | " df = pd.concat([df, interacts_df], axis=1)\n", 121 | "\n", 122 | " return df" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 30, 128 | "metadata": { 129 | "collapsed": true, 130 | "id": "VJjtuM3kvCAT" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "# Loading data\n", 135 | "train = pd.read_csv('./input/Train.csv')\n", 136 | "test = pd.read_csv('./input/Test.csv')" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 31, 142 | "metadata": { 143 | "collapsed": true, 144 | "id": "4MhDKOpYF9dm" 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "# Separating the target variable from the training dataframe\n", 149 | "#\n", 150 | "target = train.target\n", 151 | "\n", 152 | "# Aligning the train and test dataframes\n", 153 | "#\n", 154 | "train, test = train.align(test, join = 'inner',axis = 1)\n", 155 | "\n", 156 | "# Creating a separator column to both train and test,\n", 157 | "# This is to be used in separation\n", 158 | "#\n", 159 | "train['separator'] = 0\n", 160 | "test['separator'] = 1\n", 161 | "# Combing the train and test dataframes together\n", 162 | "#\n", 163 | "comb = pd.concat([train, test])\n", 164 | "\n", 165 | "# Creating a function to replace all spaces in the dataframe with np.nan\n", 166 | "#\n", 167 | "def replace_nan(x):\n", 168 | " if x==\" \":\n", 169 | " return np.nan\n", 170 | " else :\n", 171 | " return float(x)\n", 172 | "\n", 173 | "# Creating a list of the main columns\n", 174 | "#\n", 175 | "main_cols = [\"temp\",\"precip\",\"rel_humidity\",\"wind_dir\",\"wind_spd\",\"atmos_press\"]\n", 176 | "\n", 177 | "# Replacing spaces with np.nan\n", 178 | "#\n", 179 | "for col in main_cols: \n", 180 | " comb[col]=comb[col].apply(lambda x: [ replace_nan(X) for X in x.replace(\"nan\",\" \").split(\",\")])\n", 181 | "\n", 182 | "def make_columns(feature):\n", 183 | " return [f\"{feature}_{i}\" for i in range(1, 122)]\n", 184 | " \n", 185 | "# Generating dataframes of hours for each main column\n", 186 | "#\n", 187 | "comb_temp = pd.DataFrame([x for x in comb.temp], columns=make_columns('temp'))\n", 188 | "comb_precip = pd.DataFrame([x for x in comb.precip], columns=make_columns('precip'))\n", 189 | "comb_rel_humidity = pd.DataFrame([x for x in comb.rel_humidity], columns=make_columns('rel_humidity'))\n", 190 | "comb_wind_dir = pd.DataFrame([x for x in comb.wind_dir], columns=make_columns('wind_dir'))\n", 191 | "comb_wind_spd = pd.DataFrame([x for x in comb.wind_spd], columns=make_columns('wind_spd'))\n", 192 | "comb_atmos_press = pd.DataFrame([x for x in comb.atmos_press], columns=make_columns('atmos_press'))\n", 193 | "\n", 194 | "comb_temp['ID'], comb_precip['ID'], comb_rel_humidity['ID'], comb_wind_dir['ID'], comb_wind_spd['ID'], comb_atmos_press['ID'] = [list(comb.ID)] * 6\n", 195 | "\n", 196 | "# Combining the generated dataframes together\n", 197 | "#\n", 198 | "comb_dfs = [comb, comb_temp, comb_precip, comb_rel_humidity, comb_wind_dir, comb_wind_spd, comb_atmos_press]\n", 199 | "comb = reduce(lambda left,right: pd.merge(left,right,on=['ID'], how='outer'), comb_dfs)\n", 200 | "comb.drop(main_cols, axis = 1, inplace = True)\n", 201 | "df = comb.copy()" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 32, 207 | "metadata": { 208 | "collapsed": true, 209 | "id": "pEyFVf1lqInY" 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "comb = df.copy()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 33, 219 | "metadata": { 220 | "id": "xzn_w6FYftB8", 221 | "outputId": "9c6b5360-766f-4c1b-c223-c00ceaa834f9" 222 | }, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "application/vnd.jupyter.widget-view+json": { 227 | "model_id": "999f9adfcfd444af978f061a15a74ea8", 228 | "version_major": 2, 229 | "version_minor": 0 230 | }, 231 | "text/plain": [ 232 | "HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))" 233 | ] 234 | }, 235 | "metadata": {}, 236 | "output_type": "display_data" 237 | }, 238 | { 239 | "data": { 240 | "application/vnd.jupyter.widget-view+json": { 241 | "model_id": "", 242 | "version_major": 2, 243 | "version_minor": 0 244 | }, 245 | "text/plain": [ 246 | "HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))" 247 | ] 248 | }, 249 | "metadata": {}, 250 | "output_type": "display_data" 251 | }, 252 | { 253 | "data": { 254 | "application/vnd.jupyter.widget-view+json": { 255 | "model_id": "", 256 | "version_major": 2, 257 | "version_minor": 0 258 | }, 259 | "text/plain": [ 260 | "HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))" 261 | ] 262 | }, 263 | "metadata": {}, 264 | "output_type": "display_data" 265 | }, 266 | { 267 | "data": { 268 | "application/vnd.jupyter.widget-view+json": { 269 | "model_id": "", 270 | "version_major": 2, 271 | "version_minor": 0 272 | }, 273 | "text/plain": [ 274 | "HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))" 275 | ] 276 | }, 277 | "metadata": {}, 278 | "output_type": "display_data" 279 | }, 280 | { 281 | "data": { 282 | "application/vnd.jupyter.widget-view+json": { 283 | "model_id": "", 284 | "version_major": 2, 285 | "version_minor": 0 286 | }, 287 | "text/plain": [ 288 | "HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))" 289 | ] 290 | }, 291 | "metadata": {}, 292 | "output_type": "display_data" 293 | }, 294 | { 295 | "data": { 296 | "application/vnd.jupyter.widget-view+json": { 297 | "model_id": "", 298 | "version_major": 2, 299 | "version_minor": 0 300 | }, 301 | "text/plain": [ 302 | "HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))" 303 | ] 304 | }, 305 | "metadata": {}, 306 | "output_type": "display_data" 307 | }, 308 | { 309 | "data": { 310 | "application/vnd.jupyter.widget-view+json": { 311 | "model_id": "", 312 | "version_major": 2, 313 | "version_minor": 0 314 | }, 315 | "text/plain": [ 316 | "HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))" 317 | ] 318 | }, 319 | "metadata": {}, 320 | "output_type": "display_data" 321 | }, 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "# Adding aggregation features for each variable\n", 332 | "#\n", 333 | "aggs = ['mean', 'std', 'var', 'kurt', 'skew', 'max', 'median', 'sum', 'mode', 'sem', 'min']\n", 334 | "for col in tqdm_notebook(main_cols):\n", 335 | " for ag in tqdm_notebook(aggs, leave = False):\n", 336 | " if ag == 'mode':\n", 337 | " comb[col[0] + col[-1] + '_'+ag] = comb[[x for x in comb.columns if x.startswith(col)]].agg(ag, axis = 1)[0]\n", 338 | " else:\n", 339 | " comb[col[0] + col[-1] + '_'+ag] = comb[[x for x in comb.columns if x.startswith(col)]].agg(ag, axis = 1)\n", 340 | "\n", 341 | "# Creating separate dataframes for each variable\n", 342 | "# Creating a list of columns for each separate dataframe\n", 343 | "#\n", 344 | "temp, temp_cols = comb[[x for x in comb.columns if x.startswith('temp')]], [x for x in comb.columns if x.startswith('temp')]\n", 345 | "precip, precip_cols = comb[[x for x in comb.columns if x.startswith('precip')]], [x for x in comb.columns if x.startswith('precip')]\n", 346 | "humid, humid_cols = comb[[x for x in comb.columns if x.startswith('rel_humidity')]], [x for x in comb.columns if x.startswith('rel_humidity')]\n", 347 | "wind_dir, wind_dir_cols = comb[[x for x in comb.columns if x.startswith('wind_dir')]], [x for x in comb.columns if x.startswith('wind_dir')]\n", 348 | "wind_spd, wind_spd_cols = comb[[x for x in comb.columns if x.startswith('wind_spd')]], [x for x in comb.columns if x.startswith('wind_spd')]\n", 349 | "atmp, atmp_cols = comb[[x for x in comb.columns if x.startswith('atmos_press')]], [x for x in comb.columns if x.startswith('atmos_press')]\n", 350 | "fill_cols = comb.columns" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 34, 356 | "metadata": { 357 | "id": "XHCE-yW1r3q8", 358 | "outputId": "dd3d6870-4a06-4794-c94c-eba320fa9335" 359 | }, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/html": [ 364 | "
\n", 365 | "\n", 378 | "\n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | "
IDlocationseparatortemp_1temp_2temp_3temp_4temp_5temp_6temp_7...as_stdas_varas_kurtas_skewas_maxas_medianas_sumas_modeas_semas_min
0ID_train_0C0NaNNaNNaNNaNNaNNaNNaN...0.0726820.005283-0.158696-0.38314487.87166787.7620831404.03893987.6141670.01817087.614167
1ID_train_1D022.53333321.71666720.83333320.98333320.87500020.14166719.375000...0.1566480.024539-0.446340-0.17335690.72500090.42916710942.02083390.2191670.01424190.056667
2ID_train_10A028.97500027.95000029.60000026.42500022.09166721.77500022.333333...0.1802330.032484-0.227481-0.24356188.81333388.42500010610.51166788.2875000.01645387.982500
3ID_train_100A022.96666724.26666725.27500025.62500025.86666725.09166724.025000...0.1624300.026384-0.462889-0.34761088.68500088.40000010693.60666788.2716670.01476687.965000
4ID_train_1000A021.87500021.57500021.52500021.43333320.50833319.91666718.991667...0.1203930.014494-0.062557-0.70566788.71916788.5525002656.14310688.2683330.02198188.268333
\n", 528 | "

5 rows × 795 columns

\n", 529 | "
" 530 | ], 531 | "text/plain": [ 532 | " ID location separator temp_1 temp_2 temp_3 \\\n", 533 | "0 ID_train_0 C 0 NaN NaN NaN \n", 534 | "1 ID_train_1 D 0 22.533333 21.716667 20.833333 \n", 535 | "2 ID_train_10 A 0 28.975000 27.950000 29.600000 \n", 536 | "3 ID_train_100 A 0 22.966667 24.266667 25.275000 \n", 537 | "4 ID_train_1000 A 0 21.875000 21.575000 21.525000 \n", 538 | "\n", 539 | " temp_4 temp_5 temp_6 temp_7 ... as_std as_var \\\n", 540 | "0 NaN NaN NaN NaN ... 0.072682 0.005283 \n", 541 | "1 20.983333 20.875000 20.141667 19.375000 ... 0.156648 0.024539 \n", 542 | "2 26.425000 22.091667 21.775000 22.333333 ... 0.180233 0.032484 \n", 543 | "3 25.625000 25.866667 25.091667 24.025000 ... 0.162430 0.026384 \n", 544 | "4 21.433333 20.508333 19.916667 18.991667 ... 0.120393 0.014494 \n", 545 | "\n", 546 | " as_kurt as_skew as_max as_median as_sum as_mode \\\n", 547 | "0 -0.158696 -0.383144 87.871667 87.762083 1404.038939 87.614167 \n", 548 | "1 -0.446340 -0.173356 90.725000 90.429167 10942.020833 90.219167 \n", 549 | "2 -0.227481 -0.243561 88.813333 88.425000 10610.511667 88.287500 \n", 550 | "3 -0.462889 -0.347610 88.685000 88.400000 10693.606667 88.271667 \n", 551 | "4 -0.062557 -0.705667 88.719167 88.552500 2656.143106 88.268333 \n", 552 | "\n", 553 | " as_sem as_min \n", 554 | "0 0.018170 87.614167 \n", 555 | "1 0.014241 90.056667 \n", 556 | "2 0.016453 87.982500 \n", 557 | "3 0.014766 87.965000 \n", 558 | "4 0.021981 88.268333 \n", 559 | "\n", 560 | "[5 rows x 795 columns]" 561 | ] 562 | }, 563 | "execution_count": 34, 564 | "metadata": {}, 565 | "output_type": "execute_result" 566 | } 567 | ], 568 | "source": [ 569 | "# Previewing the head of the generated dataframe\n", 570 | "#\n", 571 | "comb.head()" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 35, 577 | "metadata": { 578 | "id": "v4DlbAzKp_c8", 579 | "outputId": "26c4e9d8-68ae-4ed1-8e9a-e9a009f7343c" 580 | }, 581 | "outputs": [ 582 | { 583 | "data": { 584 | "application/vnd.jupyter.widget-view+json": { 585 | "model_id": "27712189b04b46b68969a0482c5243a5", 586 | "version_major": 2, 587 | "version_minor": 0 588 | }, 589 | "text/plain": [ 590 | "HBox(children=(FloatProgress(value=0.0, max=795.0), HTML(value='')))" 591 | ] 592 | }, 593 | "metadata": {}, 594 | "output_type": "display_data" 595 | }, 596 | { 597 | "name": "stdout", 598 | "output_type": "stream", 599 | "text": [ 600 | "\n" 601 | ] 602 | }, 603 | { 604 | "data": { 605 | "application/vnd.jupyter.widget-view+json": { 606 | "model_id": "", 607 | "version_major": 2, 608 | "version_minor": 0 609 | }, 610 | "text/plain": [ 611 | "HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))" 612 | ] 613 | }, 614 | "metadata": {}, 615 | "output_type": "display_data" 616 | } 617 | ], 618 | "source": [ 619 | "# Creating a list of columns containing aggregates only\n", 620 | "cols_mean = []\n", 621 | "cols_max = []\n", 622 | "cols_min = []\n", 623 | "for x in tqdm_notebook(comb.columns):\n", 624 | " if 'mean' in x:\n", 625 | " cols_mean.append(x)\n", 626 | " elif 'max' in x:\n", 627 | " cols_max.append(x)\n", 628 | " elif 'min' in x:\n", 629 | " cols_min.append(x)\n", 630 | " else:\n", 631 | " pass\n", 632 | "\n", 633 | "# Generating feature interactions between aggregates only\n", 634 | "for num_cols in tqdm_notebook([cols_mean, cols_max, cols_min], leave = False):\n", 635 | " comb = add_prod_interacts(comb, num_cols)\n", 636 | " comb = add_div_interacts(comb, num_cols)\n", 637 | " comb = add_diff_interacts(comb, num_cols)" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 36, 643 | "metadata": { 644 | "collapsed": true, 645 | "id": "qdVrf3ZmcFZn" 646 | }, 647 | "outputs": [], 648 | "source": [ 649 | "# Generating new features, by adding each variable per hour\n", 650 | "for x, y, z, a, b in zip(temp.columns, precip.columns, humid.columns, wind_spd.columns, atmp.columns):\n", 651 | " comb['add_tp' +y[-4:]] = temp[x] + precip[y] + humid[z] + wind_spd[a] + atmp[b]" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 37, 657 | "metadata": { 658 | "collapsed": true, 659 | "id": "IU2Z7uDigrgW" 660 | }, 661 | "outputs": [], 662 | "source": [ 663 | "# Filling missing values using forward fill\n", 664 | "comb = comb.ffill(axis = 1)" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": 38, 670 | "metadata": { 671 | "collapsed": true, 672 | "id": "YnsfoPv_R5yN" 673 | }, 674 | "outputs": [], 675 | "source": [ 676 | "# Calculating the difference between features per each dataframe\n", 677 | "\n", 678 | "dfs = [temp, precip, humid, wind_spd, atmp]\n", 679 | "\n", 680 | "diff_dfs = []\n", 681 | "for i in range(5):\n", 682 | " i = dfs[i]\n", 683 | " name = str(i.columns[0].split('_')[0])\n", 684 | " temp_df = i.diff(axis = 1).values\n", 685 | " temp_df = pd.DataFrame(temp_df, columns=['diff_' +name + '_' + str(i) for i in range(1, 122)])\n", 686 | " diff_dfs.append(temp_df)\n", 687 | "\n", 688 | "diff_dfs.append(comb)\n", 689 | "comb = reduce(lambda left,right: pd.merge(left,right, right_index=True, left_index=True, how='outer'), diff_dfs)" 690 | ] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": 39, 695 | "metadata": { 696 | "id": "TUeOD_AMUMyQ", 697 | "outputId": "ebd84e15-b073-45aa-d096-c2f270ce2e10" 698 | }, 699 | "outputs": [ 700 | { 701 | "data": { 702 | "text/html": [ 703 | "
\n", 704 | "\n", 717 | "\n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | "
diff_temp_1diff_temp_2diff_temp_3diff_temp_4diff_temp_5diff_temp_6diff_temp_7diff_temp_8diff_temp_9diff_temp_10...add_tp_112add_tp_113add_tp_114add_tp_115add_tp_116add_tp_117add_tp_118add_tp_119add_tp_120add_tp_121
0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...111.009110.396109.991110.031110.085109.618109.161108.627108.839109.081
1NaN-0.816667-0.8833330.150000-0.108333-0.733333-0.766667-0.583333-0.016667-0.133333...118.268119.334120.191122.078122.734123.072121.885119.338118.589114.357
2NaN-1.0250001.650000-3.175000-4.333333-0.3166670.558333-0.383333-1.5083330.000000...111.332110.389110.174110.092110.746113.401116.527118.449119.659120.692
3NaN1.3000001.0083330.3500000.241667-0.775000-1.066667-1.600000-2.191667-2.816667...109.088108.214108.176107.477107.332108.305110.405112.255114.158115.68
4NaN-0.300000-0.050000-0.091667-0.925000-0.591667-0.925000-0.400000-0.541667-0.133333...110.649110.649110.649110.649110.649110.649110.649110.649110.649110.649
\n", 867 | "

5 rows × 1656 columns

\n", 868 | "
" 869 | ], 870 | "text/plain": [ 871 | " diff_temp_1 diff_temp_2 diff_temp_3 diff_temp_4 diff_temp_5 \\\n", 872 | "0 NaN NaN NaN NaN NaN \n", 873 | "1 NaN -0.816667 -0.883333 0.150000 -0.108333 \n", 874 | "2 NaN -1.025000 1.650000 -3.175000 -4.333333 \n", 875 | "3 NaN 1.300000 1.008333 0.350000 0.241667 \n", 876 | "4 NaN -0.300000 -0.050000 -0.091667 -0.925000 \n", 877 | "\n", 878 | " diff_temp_6 diff_temp_7 diff_temp_8 diff_temp_9 diff_temp_10 ... \\\n", 879 | "0 NaN NaN NaN NaN NaN ... \n", 880 | "1 -0.733333 -0.766667 -0.583333 -0.016667 -0.133333 ... \n", 881 | "2 -0.316667 0.558333 -0.383333 -1.508333 0.000000 ... \n", 882 | "3 -0.775000 -1.066667 -1.600000 -2.191667 -2.816667 ... \n", 883 | "4 -0.591667 -0.925000 -0.400000 -0.541667 -0.133333 ... \n", 884 | "\n", 885 | " add_tp_112 add_tp_113 add_tp_114 add_tp_115 add_tp_116 add_tp_117 \\\n", 886 | "0 111.009 110.396 109.991 110.031 110.085 109.618 \n", 887 | "1 118.268 119.334 120.191 122.078 122.734 123.072 \n", 888 | "2 111.332 110.389 110.174 110.092 110.746 113.401 \n", 889 | "3 109.088 108.214 108.176 107.477 107.332 108.305 \n", 890 | "4 110.649 110.649 110.649 110.649 110.649 110.649 \n", 891 | "\n", 892 | " add_tp_118 add_tp_119 add_tp_120 add_tp_121 \n", 893 | "0 109.161 108.627 108.839 109.081 \n", 894 | "1 121.885 119.338 118.589 114.357 \n", 895 | "2 116.527 118.449 119.659 120.692 \n", 896 | "3 110.405 112.255 114.158 115.68 \n", 897 | "4 110.649 110.649 110.649 110.649 \n", 898 | "\n", 899 | "[5 rows x 1656 columns]" 900 | ] 901 | }, 902 | "execution_count": 39, 903 | "metadata": {}, 904 | "output_type": "execute_result" 905 | } 906 | ], 907 | "source": [ 908 | "comb.head()" 909 | ] 910 | }, 911 | { 912 | "cell_type": "code", 913 | "execution_count": 40, 914 | "metadata": { 915 | "collapsed": true 916 | }, 917 | "outputs": [], 918 | "source": [ 919 | "def apply_qcut(feat):\n", 920 | " return pd.qcut(comb[feat], 24, labels=False, duplicates='drop')" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": 41, 926 | "metadata": { 927 | "id": "Z1eJlRcDUdrq", 928 | "outputId": "4b5cf990-88bb-4147-b6cc-27f8f9af991a" 929 | }, 930 | "outputs": [ 931 | { 932 | "data": { 933 | "application/vnd.jupyter.widget-view+json": { 934 | "model_id": "3567c0b91c18424a881e581f27602992", 935 | "version_major": 2, 936 | "version_minor": 0 937 | }, 938 | "text/plain": [ 939 | "HBox(children=(FloatProgress(value=0.0, max=1653.0), HTML(value='')))" 940 | ] 941 | }, 942 | "metadata": {}, 943 | "output_type": "display_data" 944 | }, 945 | { 946 | "name": "stdout", 947 | "output_type": "stream", 948 | "text": [ 949 | "\n", 950 | "CPU times: user 2.11 s, sys: 1.36 s, total: 3.48 s\n", 951 | "Wall time: 18.5 s\n" 952 | ] 953 | } 954 | ], 955 | "source": [ 956 | "%%time\n", 957 | "# Binning feaures\n", 958 | "other_features = [x for x in comb.columns if x not in ['separator', 'ID', 'location']]\n", 959 | "\n", 960 | "# Multiprocessing trick: 15 seconds instead of 7 minutes !\n", 961 | "binned_data = joblib.Parallel(n_jobs=-1, backend='multiprocessing')(\n", 962 | " joblib.delayed(apply_qcut)(feat) for feat in tqdm_notebook(other_features))\n", 963 | "\n", 964 | "comb_binned_data = pd.concat(binned_data, axis=1)\n", 965 | "comb = pd.concat([comb[['separator', 'ID', 'location']], comb_binned_data], axis=1)\n", 966 | "\n", 967 | "# Separating train and test from the combined dataframe\n", 968 | "train = comb[comb.separator == 0]\n", 969 | "test = comb[comb.separator == 1]\n", 970 | "train.drop('separator', axis = 1, inplace = True)\n", 971 | "test.drop('separator', axis = 1, inplace = True)\n", 972 | "\n", 973 | "# Creating a list of test ids in the order that they will be trained\n", 974 | "testA = test[test.location == 'A']\n", 975 | "testB = test[test.location == 'B']\n", 976 | "testC = test[test.location == 'C']\n", 977 | "testD = test[test.location == 'D']\n", 978 | "testE = test[test.location == 'E']\n", 979 | "\n", 980 | "tA, tD, tE, tBC = testA.ID, testD.ID, testE.ID, test[(test.location == 'B') | (test.location == 'C')].ID\n", 981 | "test_id = pd.concat([tA, tD, tE, tBC])\n", 982 | "\n", 983 | "# Adding back target to the train set\n", 984 | "train['target'] = target" 985 | ] 986 | }, 987 | { 988 | "cell_type": "code", 989 | "execution_count": 42, 990 | "metadata": { 991 | "id": "95P-K-ZljXYv", 992 | "outputId": "0e8d98eb-fbcb-4100-a4a8-fc434398a399" 993 | }, 994 | "outputs": [ 995 | { 996 | "data": { 997 | "application/vnd.jupyter.widget-view+json": { 998 | "model_id": "fd94cfe7d8b84087bd429024bb637184", 999 | "version_major": 2, 1000 | "version_minor": 0 1001 | }, 1002 | "text/plain": [ 1003 | "HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))" 1004 | ] 1005 | }, 1006 | "metadata": {}, 1007 | "output_type": "display_data" 1008 | }, 1009 | { 1010 | "name": "stdout", 1011 | "output_type": "stream", 1012 | "text": [ 1013 | "\n", 1014 | "CPU times: user 2h 35min 4s, sys: 5min 2s, total: 2h 40min 7s\n", 1015 | "Wall time: 42min 26s\n" 1016 | ] 1017 | } 1018 | ], 1019 | "source": [ 1020 | "%%time\n", 1021 | "# Creating X and y values\n", 1022 | "X = train.drop(['ID', 'location', 'target'], axis = 1)\n", 1023 | "y = target.values\n", 1024 | "\n", 1025 | "# Shuffling the X, y values\n", 1026 | "X, y = shuffle(X, y, random_state = 0)\n", 1027 | "tes = test.drop(['ID', 'location'], axis = 1)\n", 1028 | "\n", 1029 | "# Traing the model across multiple seeds\n", 1030 | "predictions = []\n", 1031 | "for i in tqdm_notebook(range(25)):\n", 1032 | " cat = CatBoostRegressor(verbose = False, random_seed=i)\n", 1033 | " cat.fit(X, y)\n", 1034 | "\n", 1035 | " preds = cat.predict(tes)\n", 1036 | " predictions.append(preds)\n", 1037 | "\n", 1038 | "# Averaging the predictions\n", 1039 | "avg_preds = np.mean(predictions, axis = 0)\n", 1040 | "\n", 1041 | "# Post processing of the predictions\n", 1042 | "post_proc = [((((((((((x-0.85)*1.015)-0.85)*1.012)-0.75)*1.0095)-0.55)*1.0065)-0.8)*1.007) for x in avg_preds]\n", 1043 | "post_proc = predzz = [((x-0.85)*1.015) for x in post_proc]\n", 1044 | "\n", 1045 | "# Creating a submission file\n", 1046 | "sub_df = pd.DataFrame({'ID': test.ID, 'target':post_proc})\n", 1047 | "sub_df.to_csv('model_2.csv', index = False)" 1048 | ] 1049 | }, 1050 | { 1051 | "cell_type": "code", 1052 | "execution_count": 43, 1053 | "metadata": {}, 1054 | "outputs": [ 1055 | { 1056 | "data": { 1057 | "text/html": [ 1058 | "
\n", 1059 | "\n", 1072 | "\n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | "
IDtarget
15539ID_test_0154.512605
15540ID_test_1117.225585
15541ID_test_1026.247779
15542ID_test_10063.167372
15543ID_test_100092.044408
15544ID_test_100141.451966
15545ID_test_100284.857269
15546ID_test_100337.807807
15547ID_test_100430.820292
15548ID_test_100545.990773
\n", 1133 | "
" 1134 | ], 1135 | "text/plain": [ 1136 | " ID target\n", 1137 | "15539 ID_test_0 154.512605\n", 1138 | "15540 ID_test_1 117.225585\n", 1139 | "15541 ID_test_10 26.247779\n", 1140 | "15542 ID_test_100 63.167372\n", 1141 | "15543 ID_test_1000 92.044408\n", 1142 | "15544 ID_test_1001 41.451966\n", 1143 | "15545 ID_test_1002 84.857269\n", 1144 | "15546 ID_test_1003 37.807807\n", 1145 | "15547 ID_test_1004 30.820292\n", 1146 | "15548 ID_test_1005 45.990773" 1147 | ] 1148 | }, 1149 | "execution_count": 43, 1150 | "metadata": {}, 1151 | "output_type": "execute_result" 1152 | } 1153 | ], 1154 | "source": [ 1155 | "sub_df.head(10)" 1156 | ] 1157 | }, 1158 | { 1159 | "cell_type": "code", 1160 | "execution_count": null, 1161 | "metadata": { 1162 | "collapsed": true 1163 | }, 1164 | "outputs": [], 1165 | "source": [] 1166 | } 1167 | ], 1168 | "metadata": { 1169 | "kernelspec": { 1170 | "display_name": "Python 3", 1171 | "language": "python", 1172 | "name": "python3" 1173 | }, 1174 | "language_info": { 1175 | "codemirror_mode": { 1176 | "name": "ipython", 1177 | "version": 3 1178 | }, 1179 | "file_extension": ".py", 1180 | "mimetype": "text/x-python", 1181 | "name": "python", 1182 | "nbconvert_exporter": "python", 1183 | "pygments_lexer": "ipython3", 1184 | "version": "3.6.3" 1185 | } 1186 | }, 1187 | "nbformat": 4, 1188 | "nbformat_minor": 4 1189 | } 1190 | -------------------------------------------------------------------------------- /zindi-airqo-cnn-quick.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 28, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "### CNN output may vary everytime due to reproducibility issues\n", 12 | "### Using Kaggle GPU it takes only 22 minutes to run.\n", 13 | "### CNN is the most feasible option for implementation as the data size grows.\n", 14 | "### It does not require any kind of feature engineering - CNN does auto feature engineering.\n", 15 | "### GPU makes crunching a big dataset easier and faster.\n", 16 | "### CNN performance improves as the data increases - Also CNN has a good really CV.\n", 17 | "\n", 18 | "# -> Our CNN can easily handle different features. So even if new meteorological features are added no worries !!!" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", 26 | "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd \n", 32 | "import numpy as np \n", 33 | "from tqdm.notebook import tqdm\n", 34 | "import math\n", 35 | "import gc\n", 36 | "from sklearn.preprocessing import LabelEncoder\n", 37 | "import matplotlib.pyplot as plt\n", 38 | "from sklearn.model_selection import KFold, train_test_split\n", 39 | "\n", 40 | "import os\n", 41 | "import shutil\n", 42 | "import datetime\n", 43 | "from tqdm import tqdm, tqdm_notebook\n", 44 | "\n", 45 | "%matplotlib inline\n", 46 | "\n", 47 | "pd.set_option(\"display.max_rows\", 200)\n", 48 | "pd.set_option(\"display.max_columns\", 200)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "from datetime import datetime\n", 60 | "\n", 61 | "from sklearn.model_selection import train_test_split\n", 62 | "from sklearn.metrics import mean_squared_error\n", 63 | "from sklearn.preprocessing import StandardScaler\n", 64 | "\n", 65 | "# import keras\n", 66 | "import tensorflow as tf\n", 67 | "from tensorflow.keras.models import Sequential, Model\n", 68 | "from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, Input, BatchNormalization\n", 69 | "from tensorflow.keras.layers import *\n", 70 | "from tensorflow.keras import callbacks, optimizers\n", 71 | "from tensorflow.keras.optimizers import Adam\n", 72 | "import tensorflow.keras.backend as K\n", 73 | "from joblib import Parallel, delayed\n", 74 | "from functools import partial\n", 75 | "import gc\n", 76 | "from tensorflow.keras.layers import MaxPooling1D" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 1, 82 | "metadata": { 83 | "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", 84 | "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# !mkdir input\n", 90 | "# !cp /kaggle/input/zindi-airqo/* ./input" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "# Loading data\n", 102 | "train = pd.read_csv('./input/Train.csv')\n", 103 | "test = pd.read_csv('./input/Test.csv')" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 8, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stderr", 113 | "output_type": "stream", 114 | "text": [ 115 | "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:5: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", 116 | "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", 117 | " \"\"\"\n" 118 | ] 119 | }, 120 | { 121 | "data": { 122 | "application/vnd.jupyter.widget-view+json": { 123 | "model_id": "6e4983244c3c4439be3e2b17ec9374ab", 124 | "version_major": 2, 125 | "version_minor": 0 126 | }, 127 | "text/plain": [ 128 | "HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))" 129 | ] 130 | }, 131 | "metadata": {}, 132 | "output_type": "display_data" 133 | }, 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "\n" 139 | ] 140 | }, 141 | { 142 | "name": "stderr", 143 | "output_type": "stream", 144 | "text": [ 145 | "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:16: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", 146 | "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", 147 | " app.launch_new_instance()\n" 148 | ] 149 | }, 150 | { 151 | "data": { 152 | "application/vnd.jupyter.widget-view+json": { 153 | "model_id": "266970b55163443f9c9029b908024b24", 154 | "version_major": 2, 155 | "version_minor": 0 156 | }, 157 | "text/plain": [ 158 | "HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))" 159 | ] 160 | }, 161 | "metadata": {}, 162 | "output_type": "display_data" 163 | }, 164 | { 165 | "name": "stdout", 166 | "output_type": "stream", 167 | "text": [ 168 | "\n", 169 | "(20574, 857)\n", 170 | "144018\n", 171 | "CPU times: user 22 s, sys: 2.7 s, total: 24.7 s\n", 172 | "Wall time: 33.5 s\n" 173 | ] 174 | }, 175 | { 176 | "data": { 177 | "text/html": [ 178 | "
\n", 179 | "\n", 192 | "\n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | "
IDlocationtargettemppreciprel_humiditywind_spdatmos_presstemp_0temp_1temp_2temp_3temp_4temp_5temp_6temp_7temp_8temp_9temp_10temp_11temp_12temp_13temp_14temp_15temp_16temp_17temp_18temp_19temp_20temp_21temp_22temp_23temp_24temp_25temp_26temp_27temp_28temp_29temp_30temp_31temp_32temp_33temp_34temp_35temp_36temp_37temp_38temp_39temp_40temp_41temp_42temp_43temp_44temp_45temp_46temp_47temp_48temp_49temp_50temp_51temp_52temp_53temp_54temp_55temp_56temp_57temp_58temp_59temp_60temp_61temp_62temp_63temp_64temp_65temp_66temp_67temp_68temp_69temp_70temp_71temp_72temp_73temp_74temp_75temp_76temp_77temp_78temp_79temp_80temp_81temp_82temp_83temp_84temp_85temp_86temp_87temp_88temp_89temp_90temp_91...wind_dir_sin_21wind_dir_sin_22wind_dir_sin_23wind_dir_sin_24wind_dir_sin_25wind_dir_sin_26wind_dir_sin_27wind_dir_sin_28wind_dir_sin_29wind_dir_sin_30wind_dir_sin_31wind_dir_sin_32wind_dir_sin_33wind_dir_sin_34wind_dir_sin_35wind_dir_sin_36wind_dir_sin_37wind_dir_sin_38wind_dir_sin_39wind_dir_sin_40wind_dir_sin_41wind_dir_sin_42wind_dir_sin_43wind_dir_sin_44wind_dir_sin_45wind_dir_sin_46wind_dir_sin_47wind_dir_sin_48wind_dir_sin_49wind_dir_sin_50wind_dir_sin_51wind_dir_sin_52wind_dir_sin_53wind_dir_sin_54wind_dir_sin_55wind_dir_sin_56wind_dir_sin_57wind_dir_sin_58wind_dir_sin_59wind_dir_sin_60wind_dir_sin_61wind_dir_sin_62wind_dir_sin_63wind_dir_sin_64wind_dir_sin_65wind_dir_sin_66wind_dir_sin_67wind_dir_sin_68wind_dir_sin_69wind_dir_sin_70wind_dir_sin_71wind_dir_sin_72wind_dir_sin_73wind_dir_sin_74wind_dir_sin_75wind_dir_sin_76wind_dir_sin_77wind_dir_sin_78wind_dir_sin_79wind_dir_sin_80wind_dir_sin_81wind_dir_sin_82wind_dir_sin_83wind_dir_sin_84wind_dir_sin_85wind_dir_sin_86wind_dir_sin_87wind_dir_sin_88wind_dir_sin_89wind_dir_sin_90wind_dir_sin_91wind_dir_sin_92wind_dir_sin_93wind_dir_sin_94wind_dir_sin_95wind_dir_sin_96wind_dir_sin_97wind_dir_sin_98wind_dir_sin_99wind_dir_sin_100wind_dir_sin_101wind_dir_sin_102wind_dir_sin_103wind_dir_sin_104wind_dir_sin_105wind_dir_sin_106wind_dir_sin_107wind_dir_sin_108wind_dir_sin_109wind_dir_sin_110wind_dir_sin_111wind_dir_sin_112wind_dir_sin_113wind_dir_sin_114wind_dir_sin_115wind_dir_sin_116wind_dir_sin_117wind_dir_sin_118wind_dir_sin_119wind_dir_sin_120
0ID_train_0C45.126304NaNNaNNaNNaNNaN22.53333321.71666720.83333320.98333320.87500020.14166719.37500018.79166718.77500018.64166718.55833318.53333319.60833321.91666724.71666726.65833329.17500030.70000031.43333332.33333333.00833333.39166733.61666731.09166727.55000026.65833325.67500026.42500023.78333322.41666722.04166721.00833321.47500021.82500021.15833322.30833323.11666722.33333323.85000023.82500024.98333328.05000030.84166731.99166732.04166732.00000031.66666729.15833325.86666724.30000023.68333323.75833322.99166721.89166721.15833320.41666719.53333319.12500018.76666717.98333318.45833321.93333323.79166725.55833327.75833329.65833331.39166731.88333332.35833332.70833331.98333330.85000028.80026.49166724.86666724.36666723.02500022.32500021.65000020.75000020.47500019.64166719.51666719.57500020.00000023.35833325.60833326.88333326.35833327.32500029.00833328.433333...-0.1416200.677692-0.691042-0.570434-0.6215530.054916-0.068411-0.427471-0.7481190.7958070.997073-0.105681-0.1702960.8505610.971100-0.002620-0.8978810.8555720.5622760.6060490.902810-0.853162-0.186615-0.0327980.937824-0.317277-0.451505-0.797416-0.159037-0.8427040.5900000.2494850.6437850.369674-0.910274-0.3908970.531411-0.1176710.6820720.997411-0.4450400.6415540.9179980.6479000.8434000.831011-0.780758-0.402907-0.970055-0.775132-0.737531-0.8770860.706791-0.9912530.8703720.235040-0.399780-0.890181-0.4290680.5365050.849812-0.9217840.8900760.325722-0.9689460.422955-0.908680-0.2385310.994556-0.784712-0.3273690.7932150.9211650.900036-0.244630-0.858170-0.9808760.890335-0.992381-0.861434-0.629305-0.2905130.999024-0.9856270.403930-0.778758-0.6231890.921356-0.949976-0.9386220.5399890.833571-0.5664730.2494510.703657-0.7755810.1505660.1173690.635717-0.947955
1ID_train_1D79.131702NaNNaNNaNNaNNaN22.53333321.71666720.83333320.98333320.87500020.14166719.37500018.79166718.77500018.64166718.55833318.53333319.60833321.91666724.71666726.65833329.17500030.70000031.43333332.33333333.00833333.39166733.61666731.09166727.55000026.65833325.67500026.42500023.78333322.41666722.04166721.00833321.47500021.82500021.15833322.30833323.11666722.33333323.85000023.82500024.98333328.05000030.84166731.99166732.04166732.00000031.66666729.15833325.86666724.30000023.68333323.75833322.99166721.89166721.15833320.41666719.53333319.12500018.76666717.98333318.45833321.93333323.79166725.55833327.75833329.65833331.39166731.88333332.35833332.70833331.98333330.85000028.80026.49166724.86666724.36666723.02500022.32500021.65000020.75000020.47500019.64166719.51666719.57500020.00000023.35833325.60833326.88333326.35833327.32500029.00833328.433333...-0.1416200.677692-0.691042-0.570434-0.6215530.054916-0.068411-0.427471-0.7481190.7958070.997073-0.105681-0.1702960.8505610.971100-0.002620-0.8978810.8555720.5622760.6060490.902810-0.853162-0.186615-0.0327980.937824-0.317277-0.451505-0.797416-0.159037-0.8427040.5900000.2494850.6437850.369674-0.910274-0.3908970.531411-0.1176710.6820720.997411-0.4450400.6415540.9179980.6479000.8434000.831011-0.780758-0.402907-0.970055-0.775132-0.737531-0.8770860.706791-0.9912530.8703720.235040-0.399780-0.890181-0.4290680.5365050.849812-0.9217840.8900760.325722-0.9689460.422955-0.908680-0.2385310.994556-0.784712-0.3273690.7932150.9211650.900036-0.244630-0.858170-0.9808760.890335-0.992381-0.861434-0.629305-0.2905130.999024-0.9856270.923642-0.5537730.455011-0.9890010.211568-0.2308570.9715380.669779-0.839579-0.212372-0.0294920.9032390.8816710.304360-0.9557220.996240
2ID_train_10A32.661304NaNNaNNaNNaNNaN28.97500027.95000029.60000026.42500022.09166721.77500022.33333321.95000020.44166720.44166720.95000019.80000019.59166719.57500019.51666719.55000019.78333319.90833319.51666719.66666720.68333322.49166723.70833324.90000026.05833327.32500027.86666728.29166722.13636419.14166719.99166720.21666720.43333320.46666720.80000020.95833320.64166720.03333319.82500019.61666719.22500018.71666718.45833319.00000020.75833327.35833324.49166726.35000027.95000029.16666730.06666730.55000030.40000030.00833328.90833326.70000025.71666724.85833324.29166723.30833323.00833322.04166721.37500021.13333321.15833320.65833320.76666721.63333322.70833324.80833327.10833328.77500029.47529.76666729.87500029.15000027.71666727.49166726.48333325.47500025.00833324.60000024.03333323.35833322.36666722.60833322.74166721.90833321.55000021.75833320.31666720.650000...0.992270-0.7448360.312328-0.613390-0.691339-0.9962730.877801-0.619629-0.8526530.9522940.2728200.8872480.1617530.7105640.4933200.8310380.922995-0.206542-0.9051400.261427-0.8345610.9995850.969601-0.304580-0.1343510.926241-0.050912-0.9893470.8259940.9495670.105761-0.2350780.9862500.526325-0.7416610.8195010.090216-0.5375160.846942-0.385990-0.5786500.6164610.490440-0.002268-0.768049-0.7454490.9415080.847960-0.947888-0.4212780.855621-0.841248-0.738750-0.6160050.000840-0.962296-0.9134480.914193-0.680750-0.764314-0.6687800.553494-0.927593-0.6471220.3264780.9680750.946287-0.4265790.6746570.955783-0.9788360.5128570.356463-0.8674950.154614-0.9580780.4853470.984303-0.659389-0.9675700.280092-0.011816-0.862490-0.7850800.2125330.0992500.9709140.8412500.7727920.999893-0.620373-0.999964-0.3287100.549286-0.995617-0.6547510.0897680.9552920.772715-0.939837
3ID_train_100A53.850238NaNNaNNaNNaNNaN22.96666724.26666725.27500025.62500025.86666725.09166724.02500022.42500020.23333317.41666717.39166717.08333317.51666717.82500018.25833317.95000017.57500017.42500017.47500016.94166718.23333321.16666723.70833324.94166725.82500026.69166727.27500027.49166721.13636419.65000019.39166719.11666719.07500019.54166719.45000019.17500018.89166719.05833319.32500019.40000019.45833319.46666719.44166719.40833319.85000022.71666723.09166723.84166725.16666726.87500027.12500027.18333326.50000026.07500023.39166721.55833320.52500018.01666718.33333318.53333318.64166718.80833318.74166718.73333318.57500018.49166718.22500018.25000018.45833319.26666721.02500023.16666724.00024.70833325.55833326.50000025.71666720.75833317.35833318.11666718.13333318.05833318.35000018.26666718.06666718.13333318.01666717.45000017.06666717.32500017.34166717.375000...0.106538-0.6566700.1272970.936974-0.7551990.225715-0.876023-0.780112-0.9308970.999544-0.888784-0.874682-0.1531980.534190-0.2936040.923470-0.319239-0.991811-0.277354-0.121781-0.040652-0.881798-0.6304390.606618-0.811544-0.991091-0.9991070.9259440.9979520.8859990.684460-0.4141670.989378-0.2268630.045114-0.1048140.307146-0.383696-0.4259370.4297350.996446-0.8756550.925057-0.823499-0.2082590.8604310.047207-0.3577160.947255-0.9895170.777529-0.2008490.8478530.269371-0.584174-0.6571060.458151-0.9332490.8240320.7221340.991245-0.613111-0.1683750.2098870.3496800.4156320.8900220.9929490.8756580.9605880.202772-0.4158000.8811970.178436-0.713055-0.977995-0.8684490.082654-0.9363020.559380-0.9265640.4857551.000000-0.202556-0.498867-0.267156-0.9017800.6056420.994990-0.5524900.5804660.9456950.669056-0.0742190.8163250.0495290.3578270.2362270.301397-0.949435
4ID_train_1000A177.418750NaNNaNNaNNaNNaN21.87500021.57500021.52500021.43333320.50833319.91666718.99166718.59166718.05000017.91666718.16666717.52500019.19166722.45833325.01666725.85833326.85000027.62500028.35833328.75000027.85000023.76666720.89166721.00833320.60833320.19166720.10833319.63333324.24734823.53750023.41666722.90000022.10000021.55416721.11666720.97083320.69166720.58750020.82083321.10833320.82916720.87916720.62500018.60833318.51666720.02083320.22500020.61250021.67500022.92500023.85795525.77916721.26363620.61666724.31666723.00833322.14583320.09583319.97083320.08333320.06250019.93333319.69166719.81666719.43333319.44166719.15416719.01666719.01666719.52500021.48750022.93750023.40024.31666725.30416726.20833326.14583323.67083321.72500021.83750021.27916720.57083320.38750019.99166719.83333319.24583318.69166718.38333318.32500018.43333318.43750018.541667...-0.9960370.7633700.632792-0.1424430.8836810.9679570.9782570.1767070.9895230.881470-0.9287330.166814-0.959935-0.9366130.054542-0.414697-0.8664270.1735000.620190-0.906068-0.0354120.999959-0.830783-0.2693910.1144150.054568-0.071428-0.999393-0.9168620.9669950.827238-0.550644-0.426123-0.532556-0.587063-0.920966-0.655409-0.218329-0.1298440.959347-0.707344-0.3370410.961337-0.9351770.440540-0.942829-0.969093-0.985078-0.946619-0.662913-0.327247-0.9792340.7384700.5581700.496131-0.0450620.8614340.4065060.440201-0.677080-0.8400710.6694220.7509880.2772060.372669-0.8231200.290495-0.201678-0.2643230.9651210.817075-0.9847890.9755690.637579-0.985222-0.9915920.728332-0.008841-0.2059110.9674860.379974-0.9491850.983956-0.404907-0.988736-0.2936430.981650-0.8914770.962301-0.9410720.4611600.970801-0.165447-0.791261-0.901593-0.4911410.996079-0.391008-0.302821-0.618876
\n", 1422 | "

5 rows × 857 columns

\n", 1423 | "
" 1424 | ], 1425 | "text/plain": [ 1426 | " ID location target temp precip rel_humidity wind_spd \\\n", 1427 | "0 ID_train_0 C 45.126304 NaN NaN NaN NaN \n", 1428 | "1 ID_train_1 D 79.131702 NaN NaN NaN NaN \n", 1429 | "2 ID_train_10 A 32.661304 NaN NaN NaN NaN \n", 1430 | "3 ID_train_100 A 53.850238 NaN NaN NaN NaN \n", 1431 | "4 ID_train_1000 A 177.418750 NaN NaN NaN NaN \n", 1432 | "\n", 1433 | " atmos_press temp_0 temp_1 temp_2 temp_3 temp_4 \\\n", 1434 | "0 NaN 22.533333 21.716667 20.833333 20.983333 20.875000 \n", 1435 | "1 NaN 22.533333 21.716667 20.833333 20.983333 20.875000 \n", 1436 | "2 NaN 28.975000 27.950000 29.600000 26.425000 22.091667 \n", 1437 | "3 NaN 22.966667 24.266667 25.275000 25.625000 25.866667 \n", 1438 | "4 NaN 21.875000 21.575000 21.525000 21.433333 20.508333 \n", 1439 | "\n", 1440 | " temp_5 temp_6 temp_7 temp_8 temp_9 temp_10 \\\n", 1441 | "0 20.141667 19.375000 18.791667 18.775000 18.641667 18.558333 \n", 1442 | "1 20.141667 19.375000 18.791667 18.775000 18.641667 18.558333 \n", 1443 | "2 21.775000 22.333333 21.950000 20.441667 20.441667 20.950000 \n", 1444 | "3 25.091667 24.025000 22.425000 20.233333 17.416667 17.391667 \n", 1445 | "4 19.916667 18.991667 18.591667 18.050000 17.916667 18.166667 \n", 1446 | "\n", 1447 | " temp_11 temp_12 temp_13 temp_14 temp_15 temp_16 \\\n", 1448 | "0 18.533333 19.608333 21.916667 24.716667 26.658333 29.175000 \n", 1449 | "1 18.533333 19.608333 21.916667 24.716667 26.658333 29.175000 \n", 1450 | "2 19.800000 19.591667 19.575000 19.516667 19.550000 19.783333 \n", 1451 | "3 17.083333 17.516667 17.825000 18.258333 17.950000 17.575000 \n", 1452 | "4 17.525000 19.191667 22.458333 25.016667 25.858333 26.850000 \n", 1453 | "\n", 1454 | " temp_17 temp_18 temp_19 temp_20 temp_21 temp_22 \\\n", 1455 | "0 30.700000 31.433333 32.333333 33.008333 33.391667 33.616667 \n", 1456 | "1 30.700000 31.433333 32.333333 33.008333 33.391667 33.616667 \n", 1457 | "2 19.908333 19.516667 19.666667 20.683333 22.491667 23.708333 \n", 1458 | "3 17.425000 17.475000 16.941667 18.233333 21.166667 23.708333 \n", 1459 | "4 27.625000 28.358333 28.750000 27.850000 23.766667 20.891667 \n", 1460 | "\n", 1461 | " temp_23 temp_24 temp_25 temp_26 temp_27 temp_28 \\\n", 1462 | "0 31.091667 27.550000 26.658333 25.675000 26.425000 23.783333 \n", 1463 | "1 31.091667 27.550000 26.658333 25.675000 26.425000 23.783333 \n", 1464 | "2 24.900000 26.058333 27.325000 27.866667 28.291667 22.136364 \n", 1465 | "3 24.941667 25.825000 26.691667 27.275000 27.491667 21.136364 \n", 1466 | "4 21.008333 20.608333 20.191667 20.108333 19.633333 24.247348 \n", 1467 | "\n", 1468 | " temp_29 temp_30 temp_31 temp_32 temp_33 temp_34 \\\n", 1469 | "0 22.416667 22.041667 21.008333 21.475000 21.825000 21.158333 \n", 1470 | "1 22.416667 22.041667 21.008333 21.475000 21.825000 21.158333 \n", 1471 | "2 19.141667 19.991667 20.216667 20.433333 20.466667 20.800000 \n", 1472 | "3 19.650000 19.391667 19.116667 19.075000 19.541667 19.450000 \n", 1473 | "4 23.537500 23.416667 22.900000 22.100000 21.554167 21.116667 \n", 1474 | "\n", 1475 | " temp_35 temp_36 temp_37 temp_38 temp_39 temp_40 \\\n", 1476 | "0 22.308333 23.116667 22.333333 23.850000 23.825000 24.983333 \n", 1477 | "1 22.308333 23.116667 22.333333 23.850000 23.825000 24.983333 \n", 1478 | "2 20.958333 20.641667 20.033333 19.825000 19.616667 19.225000 \n", 1479 | "3 19.175000 18.891667 19.058333 19.325000 19.400000 19.458333 \n", 1480 | "4 20.970833 20.691667 20.587500 20.820833 21.108333 20.829167 \n", 1481 | "\n", 1482 | " temp_41 temp_42 temp_43 temp_44 temp_45 temp_46 \\\n", 1483 | "0 28.050000 30.841667 31.991667 32.041667 32.000000 31.666667 \n", 1484 | "1 28.050000 30.841667 31.991667 32.041667 32.000000 31.666667 \n", 1485 | "2 18.716667 18.458333 19.000000 20.758333 27.358333 24.491667 \n", 1486 | "3 19.466667 19.441667 19.408333 19.850000 22.716667 23.091667 \n", 1487 | "4 20.879167 20.625000 18.608333 18.516667 20.020833 20.225000 \n", 1488 | "\n", 1489 | " temp_47 temp_48 temp_49 temp_50 temp_51 temp_52 \\\n", 1490 | "0 29.158333 25.866667 24.300000 23.683333 23.758333 22.991667 \n", 1491 | "1 29.158333 25.866667 24.300000 23.683333 23.758333 22.991667 \n", 1492 | "2 26.350000 27.950000 29.166667 30.066667 30.550000 30.400000 \n", 1493 | "3 23.841667 25.166667 26.875000 27.125000 27.183333 26.500000 \n", 1494 | "4 20.612500 21.675000 22.925000 23.857955 25.779167 21.263636 \n", 1495 | "\n", 1496 | " temp_53 temp_54 temp_55 temp_56 temp_57 temp_58 \\\n", 1497 | "0 21.891667 21.158333 20.416667 19.533333 19.125000 18.766667 \n", 1498 | "1 21.891667 21.158333 20.416667 19.533333 19.125000 18.766667 \n", 1499 | "2 30.008333 28.908333 26.700000 25.716667 24.858333 24.291667 \n", 1500 | "3 26.075000 23.391667 21.558333 20.525000 18.016667 18.333333 \n", 1501 | "4 20.616667 24.316667 23.008333 22.145833 20.095833 19.970833 \n", 1502 | "\n", 1503 | " temp_59 temp_60 temp_61 temp_62 temp_63 temp_64 \\\n", 1504 | "0 17.983333 18.458333 21.933333 23.791667 25.558333 27.758333 \n", 1505 | "1 17.983333 18.458333 21.933333 23.791667 25.558333 27.758333 \n", 1506 | "2 23.308333 23.008333 22.041667 21.375000 21.133333 21.158333 \n", 1507 | "3 18.533333 18.641667 18.808333 18.741667 18.733333 18.575000 \n", 1508 | "4 20.083333 20.062500 19.933333 19.691667 19.816667 19.433333 \n", 1509 | "\n", 1510 | " temp_65 temp_66 temp_67 temp_68 temp_69 temp_70 \\\n", 1511 | "0 29.658333 31.391667 31.883333 32.358333 32.708333 31.983333 \n", 1512 | "1 29.658333 31.391667 31.883333 32.358333 32.708333 31.983333 \n", 1513 | "2 20.658333 20.766667 21.633333 22.708333 24.808333 27.108333 \n", 1514 | "3 18.491667 18.225000 18.250000 18.458333 19.266667 21.025000 \n", 1515 | "4 19.441667 19.154167 19.016667 19.016667 19.525000 21.487500 \n", 1516 | "\n", 1517 | " temp_71 temp_72 temp_73 temp_74 temp_75 temp_76 temp_77 \\\n", 1518 | "0 30.850000 28.800 26.491667 24.866667 24.366667 23.025000 22.325000 \n", 1519 | "1 30.850000 28.800 26.491667 24.866667 24.366667 23.025000 22.325000 \n", 1520 | "2 28.775000 29.475 29.766667 29.875000 29.150000 27.716667 27.491667 \n", 1521 | "3 23.166667 24.000 24.708333 25.558333 26.500000 25.716667 20.758333 \n", 1522 | "4 22.937500 23.400 24.316667 25.304167 26.208333 26.145833 23.670833 \n", 1523 | "\n", 1524 | " temp_78 temp_79 temp_80 temp_81 temp_82 temp_83 \\\n", 1525 | "0 21.650000 20.750000 20.475000 19.641667 19.516667 19.575000 \n", 1526 | "1 21.650000 20.750000 20.475000 19.641667 19.516667 19.575000 \n", 1527 | "2 26.483333 25.475000 25.008333 24.600000 24.033333 23.358333 \n", 1528 | "3 17.358333 18.116667 18.133333 18.058333 18.350000 18.266667 \n", 1529 | "4 21.725000 21.837500 21.279167 20.570833 20.387500 19.991667 \n", 1530 | "\n", 1531 | " temp_84 temp_85 temp_86 temp_87 temp_88 temp_89 \\\n", 1532 | "0 20.000000 23.358333 25.608333 26.883333 26.358333 27.325000 \n", 1533 | "1 20.000000 23.358333 25.608333 26.883333 26.358333 27.325000 \n", 1534 | "2 22.366667 22.608333 22.741667 21.908333 21.550000 21.758333 \n", 1535 | "3 18.066667 18.133333 18.016667 17.450000 17.066667 17.325000 \n", 1536 | "4 19.833333 19.245833 18.691667 18.383333 18.325000 18.433333 \n", 1537 | "\n", 1538 | " temp_90 temp_91 ... wind_dir_sin_21 wind_dir_sin_22 \\\n", 1539 | "0 29.008333 28.433333 ... -0.141620 0.677692 \n", 1540 | "1 29.008333 28.433333 ... -0.141620 0.677692 \n", 1541 | "2 20.316667 20.650000 ... 0.992270 -0.744836 \n", 1542 | "3 17.341667 17.375000 ... 0.106538 -0.656670 \n", 1543 | "4 18.437500 18.541667 ... -0.996037 0.763370 \n", 1544 | "\n", 1545 | " wind_dir_sin_23 wind_dir_sin_24 wind_dir_sin_25 wind_dir_sin_26 \\\n", 1546 | "0 -0.691042 -0.570434 -0.621553 0.054916 \n", 1547 | "1 -0.691042 -0.570434 -0.621553 0.054916 \n", 1548 | "2 0.312328 -0.613390 -0.691339 -0.996273 \n", 1549 | "3 0.127297 0.936974 -0.755199 0.225715 \n", 1550 | "4 0.632792 -0.142443 0.883681 0.967957 \n", 1551 | "\n", 1552 | " wind_dir_sin_27 wind_dir_sin_28 wind_dir_sin_29 wind_dir_sin_30 \\\n", 1553 | "0 -0.068411 -0.427471 -0.748119 0.795807 \n", 1554 | "1 -0.068411 -0.427471 -0.748119 0.795807 \n", 1555 | "2 0.877801 -0.619629 -0.852653 0.952294 \n", 1556 | "3 -0.876023 -0.780112 -0.930897 0.999544 \n", 1557 | "4 0.978257 0.176707 0.989523 0.881470 \n", 1558 | "\n", 1559 | " wind_dir_sin_31 wind_dir_sin_32 wind_dir_sin_33 wind_dir_sin_34 \\\n", 1560 | "0 0.997073 -0.105681 -0.170296 0.850561 \n", 1561 | "1 0.997073 -0.105681 -0.170296 0.850561 \n", 1562 | "2 0.272820 0.887248 0.161753 0.710564 \n", 1563 | "3 -0.888784 -0.874682 -0.153198 0.534190 \n", 1564 | "4 -0.928733 0.166814 -0.959935 -0.936613 \n", 1565 | "\n", 1566 | " wind_dir_sin_35 wind_dir_sin_36 wind_dir_sin_37 wind_dir_sin_38 \\\n", 1567 | "0 0.971100 -0.002620 -0.897881 0.855572 \n", 1568 | "1 0.971100 -0.002620 -0.897881 0.855572 \n", 1569 | "2 0.493320 0.831038 0.922995 -0.206542 \n", 1570 | "3 -0.293604 0.923470 -0.319239 -0.991811 \n", 1571 | "4 0.054542 -0.414697 -0.866427 0.173500 \n", 1572 | "\n", 1573 | " wind_dir_sin_39 wind_dir_sin_40 wind_dir_sin_41 wind_dir_sin_42 \\\n", 1574 | "0 0.562276 0.606049 0.902810 -0.853162 \n", 1575 | "1 0.562276 0.606049 0.902810 -0.853162 \n", 1576 | "2 -0.905140 0.261427 -0.834561 0.999585 \n", 1577 | "3 -0.277354 -0.121781 -0.040652 -0.881798 \n", 1578 | "4 0.620190 -0.906068 -0.035412 0.999959 \n", 1579 | "\n", 1580 | " wind_dir_sin_43 wind_dir_sin_44 wind_dir_sin_45 wind_dir_sin_46 \\\n", 1581 | "0 -0.186615 -0.032798 0.937824 -0.317277 \n", 1582 | "1 -0.186615 -0.032798 0.937824 -0.317277 \n", 1583 | "2 0.969601 -0.304580 -0.134351 0.926241 \n", 1584 | "3 -0.630439 0.606618 -0.811544 -0.991091 \n", 1585 | "4 -0.830783 -0.269391 0.114415 0.054568 \n", 1586 | "\n", 1587 | " wind_dir_sin_47 wind_dir_sin_48 wind_dir_sin_49 wind_dir_sin_50 \\\n", 1588 | "0 -0.451505 -0.797416 -0.159037 -0.842704 \n", 1589 | "1 -0.451505 -0.797416 -0.159037 -0.842704 \n", 1590 | "2 -0.050912 -0.989347 0.825994 0.949567 \n", 1591 | "3 -0.999107 0.925944 0.997952 0.885999 \n", 1592 | "4 -0.071428 -0.999393 -0.916862 0.966995 \n", 1593 | "\n", 1594 | " wind_dir_sin_51 wind_dir_sin_52 wind_dir_sin_53 wind_dir_sin_54 \\\n", 1595 | "0 0.590000 0.249485 0.643785 0.369674 \n", 1596 | "1 0.590000 0.249485 0.643785 0.369674 \n", 1597 | "2 0.105761 -0.235078 0.986250 0.526325 \n", 1598 | "3 0.684460 -0.414167 0.989378 -0.226863 \n", 1599 | "4 0.827238 -0.550644 -0.426123 -0.532556 \n", 1600 | "\n", 1601 | " wind_dir_sin_55 wind_dir_sin_56 wind_dir_sin_57 wind_dir_sin_58 \\\n", 1602 | "0 -0.910274 -0.390897 0.531411 -0.117671 \n", 1603 | "1 -0.910274 -0.390897 0.531411 -0.117671 \n", 1604 | "2 -0.741661 0.819501 0.090216 -0.537516 \n", 1605 | "3 0.045114 -0.104814 0.307146 -0.383696 \n", 1606 | "4 -0.587063 -0.920966 -0.655409 -0.218329 \n", 1607 | "\n", 1608 | " wind_dir_sin_59 wind_dir_sin_60 wind_dir_sin_61 wind_dir_sin_62 \\\n", 1609 | "0 0.682072 0.997411 -0.445040 0.641554 \n", 1610 | "1 0.682072 0.997411 -0.445040 0.641554 \n", 1611 | "2 0.846942 -0.385990 -0.578650 0.616461 \n", 1612 | "3 -0.425937 0.429735 0.996446 -0.875655 \n", 1613 | "4 -0.129844 0.959347 -0.707344 -0.337041 \n", 1614 | "\n", 1615 | " wind_dir_sin_63 wind_dir_sin_64 wind_dir_sin_65 wind_dir_sin_66 \\\n", 1616 | "0 0.917998 0.647900 0.843400 0.831011 \n", 1617 | "1 0.917998 0.647900 0.843400 0.831011 \n", 1618 | "2 0.490440 -0.002268 -0.768049 -0.745449 \n", 1619 | "3 0.925057 -0.823499 -0.208259 0.860431 \n", 1620 | "4 0.961337 -0.935177 0.440540 -0.942829 \n", 1621 | "\n", 1622 | " wind_dir_sin_67 wind_dir_sin_68 wind_dir_sin_69 wind_dir_sin_70 \\\n", 1623 | "0 -0.780758 -0.402907 -0.970055 -0.775132 \n", 1624 | "1 -0.780758 -0.402907 -0.970055 -0.775132 \n", 1625 | "2 0.941508 0.847960 -0.947888 -0.421278 \n", 1626 | "3 0.047207 -0.357716 0.947255 -0.989517 \n", 1627 | "4 -0.969093 -0.985078 -0.946619 -0.662913 \n", 1628 | "\n", 1629 | " wind_dir_sin_71 wind_dir_sin_72 wind_dir_sin_73 wind_dir_sin_74 \\\n", 1630 | "0 -0.737531 -0.877086 0.706791 -0.991253 \n", 1631 | "1 -0.737531 -0.877086 0.706791 -0.991253 \n", 1632 | "2 0.855621 -0.841248 -0.738750 -0.616005 \n", 1633 | "3 0.777529 -0.200849 0.847853 0.269371 \n", 1634 | "4 -0.327247 -0.979234 0.738470 0.558170 \n", 1635 | "\n", 1636 | " wind_dir_sin_75 wind_dir_sin_76 wind_dir_sin_77 wind_dir_sin_78 \\\n", 1637 | "0 0.870372 0.235040 -0.399780 -0.890181 \n", 1638 | "1 0.870372 0.235040 -0.399780 -0.890181 \n", 1639 | "2 0.000840 -0.962296 -0.913448 0.914193 \n", 1640 | "3 -0.584174 -0.657106 0.458151 -0.933249 \n", 1641 | "4 0.496131 -0.045062 0.861434 0.406506 \n", 1642 | "\n", 1643 | " wind_dir_sin_79 wind_dir_sin_80 wind_dir_sin_81 wind_dir_sin_82 \\\n", 1644 | "0 -0.429068 0.536505 0.849812 -0.921784 \n", 1645 | "1 -0.429068 0.536505 0.849812 -0.921784 \n", 1646 | "2 -0.680750 -0.764314 -0.668780 0.553494 \n", 1647 | "3 0.824032 0.722134 0.991245 -0.613111 \n", 1648 | "4 0.440201 -0.677080 -0.840071 0.669422 \n", 1649 | "\n", 1650 | " wind_dir_sin_83 wind_dir_sin_84 wind_dir_sin_85 wind_dir_sin_86 \\\n", 1651 | "0 0.890076 0.325722 -0.968946 0.422955 \n", 1652 | "1 0.890076 0.325722 -0.968946 0.422955 \n", 1653 | "2 -0.927593 -0.647122 0.326478 0.968075 \n", 1654 | "3 -0.168375 0.209887 0.349680 0.415632 \n", 1655 | "4 0.750988 0.277206 0.372669 -0.823120 \n", 1656 | "\n", 1657 | " wind_dir_sin_87 wind_dir_sin_88 wind_dir_sin_89 wind_dir_sin_90 \\\n", 1658 | "0 -0.908680 -0.238531 0.994556 -0.784712 \n", 1659 | "1 -0.908680 -0.238531 0.994556 -0.784712 \n", 1660 | "2 0.946287 -0.426579 0.674657 0.955783 \n", 1661 | "3 0.890022 0.992949 0.875658 0.960588 \n", 1662 | "4 0.290495 -0.201678 -0.264323 0.965121 \n", 1663 | "\n", 1664 | " wind_dir_sin_91 wind_dir_sin_92 wind_dir_sin_93 wind_dir_sin_94 \\\n", 1665 | "0 -0.327369 0.793215 0.921165 0.900036 \n", 1666 | "1 -0.327369 0.793215 0.921165 0.900036 \n", 1667 | "2 -0.978836 0.512857 0.356463 -0.867495 \n", 1668 | "3 0.202772 -0.415800 0.881197 0.178436 \n", 1669 | "4 0.817075 -0.984789 0.975569 0.637579 \n", 1670 | "\n", 1671 | " wind_dir_sin_95 wind_dir_sin_96 wind_dir_sin_97 wind_dir_sin_98 \\\n", 1672 | "0 -0.244630 -0.858170 -0.980876 0.890335 \n", 1673 | "1 -0.244630 -0.858170 -0.980876 0.890335 \n", 1674 | "2 0.154614 -0.958078 0.485347 0.984303 \n", 1675 | "3 -0.713055 -0.977995 -0.868449 0.082654 \n", 1676 | "4 -0.985222 -0.991592 0.728332 -0.008841 \n", 1677 | "\n", 1678 | " wind_dir_sin_99 wind_dir_sin_100 wind_dir_sin_101 wind_dir_sin_102 \\\n", 1679 | "0 -0.992381 -0.861434 -0.629305 -0.290513 \n", 1680 | "1 -0.992381 -0.861434 -0.629305 -0.290513 \n", 1681 | "2 -0.659389 -0.967570 0.280092 -0.011816 \n", 1682 | "3 -0.936302 0.559380 -0.926564 0.485755 \n", 1683 | "4 -0.205911 0.967486 0.379974 -0.949185 \n", 1684 | "\n", 1685 | " wind_dir_sin_103 wind_dir_sin_104 wind_dir_sin_105 wind_dir_sin_106 \\\n", 1686 | "0 0.999024 -0.985627 0.403930 -0.778758 \n", 1687 | "1 0.999024 -0.985627 0.923642 -0.553773 \n", 1688 | "2 -0.862490 -0.785080 0.212533 0.099250 \n", 1689 | "3 1.000000 -0.202556 -0.498867 -0.267156 \n", 1690 | "4 0.983956 -0.404907 -0.988736 -0.293643 \n", 1691 | "\n", 1692 | " wind_dir_sin_107 wind_dir_sin_108 wind_dir_sin_109 wind_dir_sin_110 \\\n", 1693 | "0 -0.623189 0.921356 -0.949976 -0.938622 \n", 1694 | "1 0.455011 -0.989001 0.211568 -0.230857 \n", 1695 | "2 0.970914 0.841250 0.772792 0.999893 \n", 1696 | "3 -0.901780 0.605642 0.994990 -0.552490 \n", 1697 | "4 0.981650 -0.891477 0.962301 -0.941072 \n", 1698 | "\n", 1699 | " wind_dir_sin_111 wind_dir_sin_112 wind_dir_sin_113 wind_dir_sin_114 \\\n", 1700 | "0 0.539989 0.833571 -0.566473 0.249451 \n", 1701 | "1 0.971538 0.669779 -0.839579 -0.212372 \n", 1702 | "2 -0.620373 -0.999964 -0.328710 0.549286 \n", 1703 | "3 0.580466 0.945695 0.669056 -0.074219 \n", 1704 | "4 0.461160 0.970801 -0.165447 -0.791261 \n", 1705 | "\n", 1706 | " wind_dir_sin_115 wind_dir_sin_116 wind_dir_sin_117 wind_dir_sin_118 \\\n", 1707 | "0 0.703657 -0.775581 0.150566 0.117369 \n", 1708 | "1 -0.029492 0.903239 0.881671 0.304360 \n", 1709 | "2 -0.995617 -0.654751 0.089768 0.955292 \n", 1710 | "3 0.816325 0.049529 0.357827 0.236227 \n", 1711 | "4 -0.901593 -0.491141 0.996079 -0.391008 \n", 1712 | "\n", 1713 | " wind_dir_sin_119 wind_dir_sin_120 \n", 1714 | "0 0.635717 -0.947955 \n", 1715 | "1 -0.955722 0.996240 \n", 1716 | "2 0.772715 -0.939837 \n", 1717 | "3 0.301397 -0.949435 \n", 1718 | "4 -0.302821 -0.618876 \n", 1719 | "\n", 1720 | "[5 rows x 857 columns]" 1721 | ] 1722 | }, 1723 | "execution_count": 8, 1724 | "metadata": {}, 1725 | "output_type": "execute_result" 1726 | } 1727 | ], 1728 | "source": [ 1729 | "%%time\n", 1730 | "### Only takes 30s to preprocess the data\n", 1731 | "data = pd.concat([train,test]).reset_index(drop=True)\n", 1732 | "df = data[['ID', 'location', 'target']]\n", 1733 | "df = pd.concat([train,test]).reset_index(drop=True)\n", 1734 | "for c in tqdm_notebook(['temp', 'precip', 'rel_humidity', 'wind_dir','wind_spd', 'atmos_press']):\n", 1735 | " tmp = data[c].str.split(',', expand=True)\n", 1736 | " tmp.columns = [c + '_' + str(x) for x in tmp.columns]\n", 1737 | " df = pd.concat([df, tmp], axis=1)\n", 1738 | "obj_cols = [c for c in df.select_dtypes('object').columns if c not in ['ID', 'location']]\n", 1739 | "tmp = Parallel(n_jobs=4)(delayed(partial(pd.to_numeric, errors='coerce'))(df[c]) for c in obj_cols)\n", 1740 | "df = df.drop(obj_cols, axis=1)\n", 1741 | "df = pd.concat([df, pd.DataFrame(tmp).T], axis=1)\n", 1742 | "weather_cols = ['temp', 'precip', 'rel_humidity', 'wind_dir','wind_spd', 'atmos_press']\n", 1743 | "\n", 1744 | "### Filling NaNs by interpolation.\n", 1745 | "for w in tqdm_notebook(weather_cols):\n", 1746 | " selected_cols = [c for c in df.columns if w in c]\n", 1747 | " df[selected_cols] = df[selected_cols].interpolate(limit_direction='both')\n", 1748 | " ### Wind direction broken down into sine and cos components, and the original column is dropped\n", 1749 | " if w == 'wind_dir':\n", 1750 | " df[['wind_dir_cos_' + c.split('_')[-1] for c in selected_cols]] = df[selected_cols].apply(lambda x: np.cos(x))\n", 1751 | " df[['wind_dir_sin_' + c.split('_')[-1] for c in selected_cols]] = df[selected_cols].apply(lambda x: np.sin(x))\n", 1752 | " df = df.drop(selected_cols, axis = 1) \n", 1753 | "print(df.shape)\n", 1754 | "features = [c for c in df.columns if c not in ['ID', 'location', 'target']]\n", 1755 | "print(df[features].isnull().sum().sum())\n", 1756 | "df.head()" 1757 | ] 1758 | }, 1759 | { 1760 | "cell_type": "code", 1761 | "execution_count": 9, 1762 | "metadata": { 1763 | "collapsed": true 1764 | }, 1765 | "outputs": [], 1766 | "source": [ 1767 | "### Encoding each location by the mean of its target\n", 1768 | "features = [\"temp\",\"precip\",\"rel_humidity\",\"wind_spd\",\"atmos_press\", \"wind_dir_cos\", \"wind_dir_sin\"]\n", 1769 | "df['location'] = df['location'].map(df.groupby('location')['target'].mean())" 1770 | ] 1771 | }, 1772 | { 1773 | "cell_type": "code", 1774 | "execution_count": 10, 1775 | "metadata": {}, 1776 | "outputs": [ 1777 | { 1778 | "name": "stderr", 1779 | "output_type": "stream", 1780 | "text": [ 1781 | "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:4: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", 1782 | "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", 1783 | " after removing the cwd from sys.path.\n" 1784 | ] 1785 | }, 1786 | { 1787 | "data": { 1788 | "application/vnd.jupyter.widget-view+json": { 1789 | "model_id": "d3873ff95cce472896986a0bc35f665b", 1790 | "version_major": 2, 1791 | "version_minor": 0 1792 | }, 1793 | "text/plain": [ 1794 | "HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))" 1795 | ] 1796 | }, 1797 | "metadata": {}, 1798 | "output_type": "display_data" 1799 | }, 1800 | { 1801 | "name": "stdout", 1802 | "output_type": "stream", 1803 | "text": [ 1804 | "\n" 1805 | ] 1806 | } 1807 | ], 1808 | "source": [ 1809 | "### Creating more features\n", 1810 | "\n", 1811 | "new_features = []\n", 1812 | "for f in tqdm_notebook(features):\n", 1813 | " new_features.append(f'{f}_24hrs_ratio')\n", 1814 | " new_features.append(f'{f}_6hrs_ratio')\n", 1815 | " for hour, f_col in enumerate([f\"f_{i}\" for i in range(121)]):\n", 1816 | " \n", 1817 | " ### Ratio of current hour and 24 hours before\n", 1818 | " if hour - 24 < 0:\n", 1819 | " df[f'{f}_24hrs_ratio_{hour}'] = df[f'{f}_{hour}'] /(1e-5 + df[f'{f}_0'])\n", 1820 | " else:\n", 1821 | " df[f'{f}_24hrs_ratio_{hour}'] = df[f'{f}_{hour}'] /(1e-5 + df[f'{f}_{hour-24}'])\n", 1822 | "\n", 1823 | " \n", 1824 | " ### Ratio of current hour and 6 hours before\n", 1825 | " if hour - 6 < 0:\n", 1826 | " df[f'{f}_6hrs_ratio_{hour}'] = df[f'{f}_{hour}'] /(1e-5 + df[f'{f}_0'])\n", 1827 | " else:\n", 1828 | " df[f'{f}_6hrs_ratio_{hour}'] = df[f'{f}_{hour}'] /(1e-5 + df[f'{f}_{hour-6}'])" 1829 | ] 1830 | }, 1831 | { 1832 | "cell_type": "code", 1833 | "execution_count": 12, 1834 | "metadata": {}, 1835 | "outputs": [ 1836 | { 1837 | "data": { 1838 | "text/plain": [ 1839 | "['temp',\n", 1840 | " 'precip',\n", 1841 | " 'rel_humidity',\n", 1842 | " 'wind_spd',\n", 1843 | " 'atmos_press',\n", 1844 | " 'wind_dir_cos',\n", 1845 | " 'wind_dir_sin',\n", 1846 | " 'temp_24hrs_ratio',\n", 1847 | " 'temp_6hrs_ratio',\n", 1848 | " 'precip_24hrs_ratio',\n", 1849 | " 'precip_6hrs_ratio',\n", 1850 | " 'rel_humidity_24hrs_ratio',\n", 1851 | " 'rel_humidity_6hrs_ratio',\n", 1852 | " 'wind_spd_24hrs_ratio',\n", 1853 | " 'wind_spd_6hrs_ratio',\n", 1854 | " 'atmos_press_24hrs_ratio',\n", 1855 | " 'atmos_press_6hrs_ratio',\n", 1856 | " 'wind_dir_cos_24hrs_ratio',\n", 1857 | " 'wind_dir_cos_6hrs_ratio',\n", 1858 | " 'wind_dir_sin_24hrs_ratio',\n", 1859 | " 'wind_dir_sin_6hrs_ratio']" 1860 | ] 1861 | }, 1862 | "execution_count": 12, 1863 | "metadata": {}, 1864 | "output_type": "execute_result" 1865 | } 1866 | ], 1867 | "source": [ 1868 | "features = features + new_features\n", 1869 | "features" 1870 | ] 1871 | }, 1872 | { 1873 | "cell_type": "code", 1874 | "execution_count": 13, 1875 | "metadata": { 1876 | "collapsed": true 1877 | }, 1878 | "outputs": [], 1879 | "source": [ 1880 | "train=df[df.target.notnull()].reset_index(drop=True)\n", 1881 | "test=df[df.target.isna()].reset_index(drop=True)" 1882 | ] 1883 | }, 1884 | { 1885 | "cell_type": "code", 1886 | "execution_count": 14, 1887 | "metadata": { 1888 | "collapsed": true 1889 | }, 1890 | "outputs": [], 1891 | "source": [ 1892 | "def get_sample(x):\n", 1893 | " '''\n", 1894 | " Convert a train row into a sample suitable for CNN\n", 1895 | " x: row of train dataset\n", 1896 | " '''\n", 1897 | " sub_sample = np.zeros((len(features)+1, 121))\n", 1898 | " for i, f in enumerate(features + [\"location\"]):\n", 1899 | " if f == \"location\":\n", 1900 | " sub_sample[i] = x[\"location\"] * np.ones(121)\n", 1901 | " else:\n", 1902 | " cols_f = [f\"{f}_{i}\" for i in range(121)]\n", 1903 | " vals = x[cols_f].values.astype(float)\n", 1904 | " sub_sample[i] = vals\n", 1905 | " return np.array(sub_sample).astype('float')" 1906 | ] 1907 | }, 1908 | { 1909 | "cell_type": "code", 1910 | "execution_count": 15, 1911 | "metadata": {}, 1912 | "outputs": [ 1913 | { 1914 | "data": { 1915 | "text/plain": [ 1916 | "(15539, 2551)" 1917 | ] 1918 | }, 1919 | "execution_count": 15, 1920 | "metadata": {}, 1921 | "output_type": "execute_result" 1922 | } 1923 | ], 1924 | "source": [ 1925 | "train.shape" 1926 | ] 1927 | }, 1928 | { 1929 | "cell_type": "code", 1930 | "execution_count": 16, 1931 | "metadata": {}, 1932 | "outputs": [ 1933 | { 1934 | "name": "stdout", 1935 | "output_type": "stream", 1936 | "text": [ 1937 | "CPU times: user 1min 43s, sys: 3.45 s, total: 1min 47s\n", 1938 | "Wall time: 6min 39s\n" 1939 | ] 1940 | } 1941 | ], 1942 | "source": [ 1943 | "%%time\n", 1944 | "train_samples = Parallel(n_jobs=4)(delayed(get_sample)(row[1]) for row in train.iterrows())\n", 1945 | "test_samples = Parallel(n_jobs=4)(delayed(get_sample)(row[1]) for row in test.iterrows())" 1946 | ] 1947 | }, 1948 | { 1949 | "cell_type": "code", 1950 | "execution_count": 17, 1951 | "metadata": {}, 1952 | "outputs": [ 1953 | { 1954 | "data": { 1955 | "text/plain": [ 1956 | "((15539, 121, 22), (5035, 121, 22))" 1957 | ] 1958 | }, 1959 | "execution_count": 17, 1960 | "metadata": {}, 1961 | "output_type": "execute_result" 1962 | } 1963 | ], 1964 | "source": [ 1965 | "X_test = np.array(test_samples)\n", 1966 | "X_test = np.einsum('ikj->ijk', X_test)\n", 1967 | "X_train = np.array(train_samples)\n", 1968 | "X_train = np.einsum('ikj->ijk', X_train)\n", 1969 | "X_train.shape, X_test.shape" 1970 | ] 1971 | }, 1972 | { 1973 | "cell_type": "code", 1974 | "execution_count": 18, 1975 | "metadata": {}, 1976 | "outputs": [ 1977 | { 1978 | "name": "stdout", 1979 | "output_type": "stream", 1980 | "text": [ 1981 | "Train std = 1.020, Test std = 0.935, \n", 1982 | "Train mean = -0.001, Test mean = 0.002\n" 1983 | ] 1984 | } 1985 | ], 1986 | "source": [ 1987 | "X_all = np.concatenate([X_train, X_test])\n", 1988 | "\n", 1989 | "my_mean = X_all.mean(axis=(0, 1))\n", 1990 | "my_std = X_all.astype(float).std(axis=(0, 1))\n", 1991 | "\n", 1992 | "X_train[:, :] -= my_mean\n", 1993 | "X_test [:, :] -= my_mean\n", 1994 | "\n", 1995 | "X_train[:, :] /= my_std\n", 1996 | "X_test [:, :] /= my_std\n", 1997 | "\n", 1998 | "print(f\"Train std = {X_train.std(): .3f}, Test std = {X_test.std(): .3f}, \\nTrain mean = {X_train.mean(): .3f}, Test mean = {X_test.mean(): .3f}\")" 1999 | ] 2000 | }, 2001 | { 2002 | "cell_type": "code", 2003 | "execution_count": 19, 2004 | "metadata": {}, 2005 | "outputs": [ 2006 | { 2007 | "data": { 2008 | "text/plain": [ 2009 | "(15539,)" 2010 | ] 2011 | }, 2012 | "execution_count": 19, 2013 | "metadata": {}, 2014 | "output_type": "execute_result" 2015 | } 2016 | ], 2017 | "source": [ 2018 | "y_train = train[\"target\"].values\n", 2019 | "y_train.shape" 2020 | ] 2021 | }, 2022 | { 2023 | "cell_type": "code", 2024 | "execution_count": 20, 2025 | "metadata": {}, 2026 | "outputs": [ 2027 | { 2028 | "name": "stdout", 2029 | "output_type": "stream", 2030 | "text": [ 2031 | "Model: \"sequential\"\n", 2032 | "_________________________________________________________________\n", 2033 | "Layer (type) Output Shape Param # \n", 2034 | "=================================================================\n", 2035 | "batch_normalization (BatchNo multiple 88 \n", 2036 | "_________________________________________________________________\n", 2037 | "conv1d (Conv1D) multiple 4288 \n", 2038 | "_________________________________________________________________\n", 2039 | "conv1d_1 (Conv1D) multiple 20544 \n", 2040 | "_________________________________________________________________\n", 2041 | "conv1d_2 (Conv1D) multiple 57472 \n", 2042 | "_________________________________________________________________\n", 2043 | "conv1d_3 (Conv1D) multiple 180352 \n", 2044 | "_________________________________________________________________\n", 2045 | "conv1d_4 (Conv1D) multiple 557312 \n", 2046 | "_________________________________________________________________\n", 2047 | "max_pooling1d (MaxPooling1D) multiple 0 \n", 2048 | "_________________________________________________________________\n", 2049 | "flatten (Flatten) multiple 0 \n", 2050 | "_________________________________________________________________\n", 2051 | "batch_normalization_1 (Batch multiple 20480 \n", 2052 | "_________________________________________________________________\n", 2053 | "dropout (Dropout) multiple 0 \n", 2054 | "_________________________________________________________________\n", 2055 | "output_cnn_simple (Dense) multiple 163872 \n", 2056 | "_________________________________________________________________\n", 2057 | "dense (Dense) multiple 33 \n", 2058 | "=================================================================\n", 2059 | "Total params: 1,004,441\n", 2060 | "Trainable params: 994,157\n", 2061 | "Non-trainable params: 10,284\n", 2062 | "_________________________________________________________________\n" 2063 | ] 2064 | } 2065 | ], 2066 | "source": [ 2067 | "\n", 2068 | "N_FEATS = X_train.shape[2]\n", 2069 | "\n", 2070 | "\n", 2071 | "def root_mean_squared_error(y_true, y_pred):\n", 2072 | " return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) \n", 2073 | "\n", 2074 | "def make_model():\n", 2075 | " model = Sequential()\n", 2076 | " model.add(BatchNormalization())\n", 2077 | " model.add(Conv1D(filters=64, kernel_size=(3,), strides=1, activation='relu', input_shape=(121, N_FEATS)))\n", 2078 | " model.add(Conv1D(filters=64, kernel_size=(5 ,), strides=1, activation='relu'))\n", 2079 | " \n", 2080 | " model.add(Conv1D(filters=128, kernel_size=(7, ), strides=1, activation='relu'))\n", 2081 | " model.add(Conv1D(filters=128, kernel_size=(11, ), strides=1, activation='relu'))\n", 2082 | " model.add(Conv1D(filters=256, kernel_size=(17, ), strides=1, activation='relu'))\n", 2083 | " model.add(MaxPooling1D(4))\n", 2084 | " \n", 2085 | "\n", 2086 | " model.add(Flatten())\n", 2087 | " model.add(BatchNormalization())\n", 2088 | " model.add(Dropout(0.3))\n", 2089 | " \n", 2090 | " model.add(Dense(32, activation='relu', name='output_cnn_simple'))\n", 2091 | " model.add(Dense(1))\n", 2092 | "\n", 2093 | " model.compile(loss=root_mean_squared_error, optimizer=Adam(lr = 5e-4))\n", 2094 | " \n", 2095 | " return model\n", 2096 | "\n", 2097 | "m = make_model()\n", 2098 | "m.build((None, 121, N_FEATS))\n", 2099 | "m.summary()" 2100 | ] 2101 | }, 2102 | { 2103 | "cell_type": "code", 2104 | "execution_count": 21, 2105 | "metadata": {}, 2106 | "outputs": [ 2107 | { 2108 | "name": "stdout", 2109 | "output_type": "stream", 2110 | "text": [ 2111 | "\n", 2112 | "_____________________Round n-1\n", 2113 | "\n", 2114 | "\n", 2115 | "Fold n-1\n", 2116 | "\n", 2117 | "Restoring model weights from the end of the best epoch.\n", 2118 | "Epoch 00060: early stopping\n", 2119 | "Erreur = 23.69\n", 2120 | "\n", 2121 | "\n", 2122 | "\n", 2123 | "Fold n-2\n", 2124 | "\n", 2125 | "Restoring model weights from the end of the best epoch.\n", 2126 | "Epoch 00050: early stopping\n", 2127 | "Erreur = 23.92\n", 2128 | "\n", 2129 | "\n", 2130 | "\n", 2131 | "Fold n-3\n", 2132 | "\n", 2133 | "Restoring model weights from the end of the best epoch.\n", 2134 | "Epoch 00069: early stopping\n", 2135 | "Erreur = 20.91\n", 2136 | "\n", 2137 | "\n", 2138 | "\n", 2139 | "Fold n-4\n", 2140 | "\n", 2141 | "Restoring model weights from the end of the best epoch.\n", 2142 | "Epoch 00039: early stopping\n", 2143 | "Erreur = 22.98\n", 2144 | "\n", 2145 | "\n", 2146 | "\n", 2147 | "Fold n-5\n", 2148 | "\n", 2149 | "Restoring model weights from the end of the best epoch.\n", 2150 | "Epoch 00055: early stopping\n", 2151 | "Erreur = 21.24\n", 2152 | "\n", 2153 | "\n", 2154 | "\n", 2155 | "Fold n-6\n", 2156 | "\n", 2157 | "Restoring model weights from the end of the best epoch.\n", 2158 | "Epoch 00046: early stopping\n", 2159 | "Erreur = 24.60\n", 2160 | "\n", 2161 | "\n", 2162 | "\n", 2163 | "Fold n-7\n", 2164 | "\n", 2165 | "Restoring model weights from the end of the best epoch.\n", 2166 | "Epoch 00054: early stopping\n", 2167 | "Erreur = 24.30\n", 2168 | "\n", 2169 | "\n", 2170 | "\n", 2171 | "Fold n-8\n", 2172 | "\n", 2173 | "Restoring model weights from the end of the best epoch.\n", 2174 | "Epoch 00045: early stopping\n", 2175 | "Erreur = 24.49\n", 2176 | "\n", 2177 | "\n", 2178 | "\n", 2179 | "Fold n-9\n", 2180 | "\n", 2181 | "Restoring model weights from the end of the best epoch.\n", 2182 | "Epoch 00046: early stopping\n", 2183 | "Erreur = 23.19\n", 2184 | "\n", 2185 | "\n", 2186 | "\n", 2187 | "Fold n-10\n", 2188 | "\n", 2189 | "Restoring model weights from the end of the best epoch.\n", 2190 | "Epoch 00050: early stopping\n", 2191 | "Erreur = 22.19\n", 2192 | "\n", 2193 | "\n", 2194 | "Total error = 23.151\n" 2195 | ] 2196 | } 2197 | ], 2198 | "source": [ 2199 | "errcb2 = list()\n", 2200 | "y_pred_test = list()\n", 2201 | "\n", 2202 | "\n", 2203 | "for my_round in range(1):\n", 2204 | " i = 0\n", 2205 | " print(f\"\\n_____________________Round n-{my_round + 1}\\n\")\n", 2206 | " \n", 2207 | " fold = KFold(n_splits=10, shuffle=True, random_state=my_round)\n", 2208 | " \n", 2209 | " for train_index, test_index in fold.split(X_train, y_train):\n", 2210 | " i += 1\n", 2211 | " print(f\"\\nFold n-{i}\\n\")\n", 2212 | "\n", 2213 | " X_entr, X_val = X_train[train_index], X_train[test_index]\n", 2214 | " y_entr, y_val = y_train[train_index], y_train[test_index]\n", 2215 | "\n", 2216 | " es = tf.keras.callbacks.EarlyStopping(patience=10, \n", 2217 | " verbose=1, \n", 2218 | " restore_best_weights=True)\n", 2219 | "\n", 2220 | " m = make_model()\n", 2221 | "\n", 2222 | " m.fit(X_entr,\n", 2223 | " y_entr,\n", 2224 | " epochs=150,\n", 2225 | " verbose=0,\n", 2226 | " batch_size=128,\n", 2227 | " validation_data=(X_val, y_val),\n", 2228 | " callbacks=[es],\n", 2229 | " )\n", 2230 | "\n", 2231 | " # val score\n", 2232 | " preds = m.predict(X_val)[:, 0]\n", 2233 | " rmse_val = mean_squared_error(y_val, preds) ** .5\n", 2234 | " print(f\"Erreur = {rmse_val:.2f}\\n\\n\")\n", 2235 | " errcb2.append(rmse_val)\n", 2236 | "\n", 2237 | " # prediction\n", 2238 | " p2 = m.predict(X_test)[:, 0]\n", 2239 | " p2 = np.clip(p2, train.target.min(), train.target.max())\n", 2240 | "\n", 2241 | " y_pred_test.append(p2)\n", 2242 | "\n", 2243 | "print(f\"Total error = {np.mean(errcb2): .3f}\")" 2244 | ] 2245 | }, 2246 | { 2247 | "cell_type": "code", 2248 | "execution_count": 22, 2249 | "metadata": { 2250 | "collapsed": true 2251 | }, 2252 | "outputs": [], 2253 | "source": [ 2254 | "d = {'ID': test[\"ID\"], 'target': np.mean(y_pred_test, axis=0)}\n", 2255 | "sub = pd.DataFrame(data=d)\n", 2256 | "sub = sub[['ID', 'target']]\n", 2257 | "sub['target'] = sub['target'].clip(train.target.min(), train.target.max())" 2258 | ] 2259 | }, 2260 | { 2261 | "cell_type": "code", 2262 | "execution_count": 25, 2263 | "metadata": {}, 2264 | "outputs": [ 2265 | { 2266 | "data": { 2267 | "text/html": [ 2268 | "
\n", 2269 | "\n", 2282 | "\n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | "
IDtarget
0ID_test_0146.448013
1ID_test_178.831528
2ID_test_1033.348366
3ID_test_10055.782494
4ID_test_100088.687622
5ID_test_100130.877085
6ID_test_100275.518860
7ID_test_100337.986477
8ID_test_100434.908413
9ID_test_100552.110146
\n", 2343 | "
" 2344 | ], 2345 | "text/plain": [ 2346 | " ID target\n", 2347 | "0 ID_test_0 146.448013\n", 2348 | "1 ID_test_1 78.831528\n", 2349 | "2 ID_test_10 33.348366\n", 2350 | "3 ID_test_100 55.782494\n", 2351 | "4 ID_test_1000 88.687622\n", 2352 | "5 ID_test_1001 30.877085\n", 2353 | "6 ID_test_1002 75.518860\n", 2354 | "7 ID_test_1003 37.986477\n", 2355 | "8 ID_test_1004 34.908413\n", 2356 | "9 ID_test_1005 52.110146" 2357 | ] 2358 | }, 2359 | "execution_count": 25, 2360 | "metadata": {}, 2361 | "output_type": "execute_result" 2362 | } 2363 | ], 2364 | "source": [ 2365 | "sub.head(10)" 2366 | ] 2367 | }, 2368 | { 2369 | "cell_type": "code", 2370 | "execution_count": 29, 2371 | "metadata": {}, 2372 | "outputs": [ 2373 | { 2374 | "data": { 2375 | "text/plain": [ 2376 | "23.151077529050763" 2377 | ] 2378 | }, 2379 | "execution_count": 29, 2380 | "metadata": {}, 2381 | "output_type": "execute_result" 2382 | } 2383 | ], 2384 | "source": [ 2385 | "np.mean(errcb2)" 2386 | ] 2387 | }, 2388 | { 2389 | "cell_type": "code", 2390 | "execution_count": 32, 2391 | "metadata": { 2392 | "collapsed": true 2393 | }, 2394 | "outputs": [], 2395 | "source": [ 2396 | "sub.to_csv(f\"cnn_preds.csv\", index=False)" 2397 | ] 2398 | }, 2399 | { 2400 | "cell_type": "code", 2401 | "execution_count": 33, 2402 | "metadata": {}, 2403 | "outputs": [ 2404 | { 2405 | "data": { 2406 | "text/html": [ 2407 | "Download CSV file" 2408 | ], 2409 | "text/plain": [ 2410 | "" 2411 | ] 2412 | }, 2413 | "execution_count": 33, 2414 | "metadata": {}, 2415 | "output_type": "execute_result" 2416 | } 2417 | ], 2418 | "source": [ 2419 | "SUB_FILE_NAME = f\"cnn_preds.csv\"\n", 2420 | "from IPython.display import HTML\n", 2421 | "def create_download_link(title = \"Download CSV file\", filename = \"data.csv\"): \n", 2422 | " html = '{title}'\n", 2423 | " html = html.format(title=title,filename=filename)\n", 2424 | " return HTML(html)\n", 2425 | "create_download_link(filename = SUB_FILE_NAME)" 2426 | ] 2427 | } 2428 | ], 2429 | "metadata": { 2430 | "kernelspec": { 2431 | "display_name": "Python 3", 2432 | "language": "python", 2433 | "name": "python3" 2434 | }, 2435 | "language_info": { 2436 | "codemirror_mode": { 2437 | "name": "ipython", 2438 | "version": 3 2439 | }, 2440 | "file_extension": ".py", 2441 | "mimetype": "text/x-python", 2442 | "name": "python", 2443 | "nbconvert_exporter": "python", 2444 | "pygments_lexer": "ipython3", 2445 | "version": "3.6.3" 2446 | } 2447 | }, 2448 | "nbformat": 4, 2449 | "nbformat_minor": 4 2450 | } 2451 | --------------------------------------------------------------------------------