├── README.md ├── code ├── .ipynb_checkpoints │ ├── clean_data-checkpoint.ipynb │ ├── combine_fetures-checkpoint.ipynb │ ├── extract_fetures-checkpoint.ipynb │ ├── holiday_crawl-checkpoint.ipynb │ ├── predict_result-checkpoint.ipynb │ ├── rules-checkpoint.ipynb │ ├── run-checkpoint.ipynb │ ├── split_samples-checkpoint.ipynb │ ├── train_model-checkpoint.ipynb │ ├── trick_extract-checkpoint.ipynb │ └── visualize-checkpoint.ipynb ├── clean_data.ipynb ├── clean_data.py ├── combine_fetures.ipynb ├── extract_fetures.ipynb ├── extract_fetures.py ├── holiday_crawl.ipynb ├── predict_result.ipynb ├── result.txt ├── rules.ipynb ├── run.ipynb ├── split_samples.ipynb ├── train_model.ipynb ├── trick_extract.ipynb ├── trick_extract.py ├── visualize.ipynb └── weather_crawl.py └── dataset ├── holiday.csv ├── train.csv └── yangzhong.csv /README.md: -------------------------------------------------------------------------------- 1 | # 管道搭建示例 2 | ## 基于天池 大航杯"智造扬中”电力AI (失败解决方案) 3 | 4 | 详细请见[博文](http://blog.zhanglun.me/2017/06/13/%E5%A4%A7%E8%88%AA%E6%9D%AF%E2%80%9C%E6%99%BA%E9%80%A0%E6%89%AC%E4%B8%AD%E2%80%9D%E7%94%B5%E5%8A%9BAI%E5%A4%A7%E8%B5%9B%E5%8F%82%E8%B5%9B%E7%BB%8F%E9%AA%8C/) 5 | 6 | #### 文件夹介绍 7 | 8 | code --主要代码文件 9 | 10 | dataset --数据文件 11 | 12 | 13 | 项目依赖: 14 | python3 15 | numpy 16 | pandas 17 | scipy 18 | sklearn 19 | mlxtend -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/clean_data-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import sys\n", 12 | "from datetime import datetime, timedelta, date\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import pandas as pd\n", 16 | "import scipy as sp\n", 17 | "\n", 18 | "from sklearn.ensemble import IsolationForest" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "if sys.argv[1] == 'test':\n", 30 | " is_train = False\n", 31 | " train_path = '../dataset/fetures/test.csv'\n", 32 | "else:\n", 33 | " is_train = True\n", 34 | " if sys.argv[2].endswith('.json'):\n", 35 | " month = '9'\n", 36 | " else:\n", 37 | " month = sys.argv[2]\n", 38 | " train_path = '../dataset/fetures/{}/train.csv'.format(month)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "train = pd.read_csv(train_path, parse_dates=['record_date'])" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "\n", 64 | "RangeIndex: 532156 entries, 0 to 532155\n", 65 | "Data columns (total 3 columns):\n", 66 | "record_date 532156 non-null datetime64[ns]\n", 67 | "user_id 532156 non-null int64\n", 68 | "power_consumption 532156 non-null int64\n", 69 | "dtypes: datetime64[ns](1), int64(2)\n", 70 | "memory usage: 12.2 MB\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "train.info()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "threshold = 0.95" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 6, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "def clean_quantile(df):\n", 98 | " up = df.power_consumption.quantile(threshold)\n", 99 | " down = df.power_consumption.quantile(1-threshold)\n", 100 | " mean = df.power_consumption.mean()\n", 101 | " print(df['user_id'].iloc[0], 'up:', up, 'down:', down, 'mean:', mean, end='\\n', file=open('../dataset/clean.txt', 'a+'))\n", 102 | " df.loc[(df.power_consumption>=up)|(df.power_consumption<=down), 'power_consumption'] = df.power_consumption.mean()\n", 103 | " return df" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 7, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "train = train.groupby('user_id', as_index=True).apply(clean_quantile)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "# 去除这些公司后结果变差,可能是因为未来也会造成影响" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 9, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "def clean_all_zero(df):\n", 133 | " ndf = df.loc[df.power_consumption!=1]\n", 134 | " if not ndf.empty:\n", 135 | " return df\n", 136 | " else:\n", 137 | " print(df['user_id'].iloc[0])\n", 138 | " return ndf\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 10, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "259\n", 153 | "413\n", 154 | "418\n", 155 | "462\n", 156 | "469\n", 157 | "522\n", 158 | "533\n", 159 | "549\n", 160 | "550\n", 161 | "551\n", 162 | "553\n", 163 | "557\n", 164 | "570\n", 165 | "586\n", 166 | "588\n", 167 | "589\n", 168 | "598\n", 169 | "606\n", 170 | "609\n", 171 | "617\n", 172 | "622\n", 173 | "633\n", 174 | "634\n", 175 | "637\n", 176 | "638\n", 177 | "639\n", 178 | "645\n", 179 | "646\n", 180 | "647\n", 181 | "650\n", 182 | "651\n", 183 | "652\n", 184 | "653\n", 185 | "654\n", 186 | "655\n", 187 | "656\n", 188 | "657\n", 189 | "658\n", 190 | "661\n", 191 | "663\n", 192 | "664\n", 193 | "665\n", 194 | "666\n", 195 | "667\n", 196 | "668\n", 197 | "669\n", 198 | "670\n", 199 | "671\n", 200 | "672\n", 201 | "673\n", 202 | "674\n", 203 | "675\n", 204 | "676\n", 205 | "677\n", 206 | "678\n", 207 | "679\n", 208 | "680\n", 209 | "681\n", 210 | "682\n", 211 | "683\n", 212 | "684\n", 213 | "685\n", 214 | "686\n", 215 | "687\n", 216 | "688\n", 217 | "689\n", 218 | "690\n", 219 | "691\n", 220 | "692\n", 221 | "693\n", 222 | "694\n", 223 | "695\n", 224 | "696\n", 225 | "709\n", 226 | "891\n", 227 | "950\n", 228 | "1044\n", 229 | "1045\n", 230 | "1111\n", 231 | "1129\n", 232 | "1233\n", 233 | "1234\n", 234 | "1261\n", 235 | "1280\n", 236 | "1413\n", 237 | "\n", 238 | "RangeIndex: 501046 entries, 0 to 501045\n", 239 | "Data columns (total 3 columns):\n", 240 | "record_date 501046 non-null datetime64[ns]\n", 241 | "user_id 501046 non-null int64\n", 242 | "power_consumption 501046 non-null float64\n", 243 | "dtypes: datetime64[ns](1), float64(1), int64(1)\n", 244 | "memory usage: 11.5 MB\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "# train = train.groupby('user_id').apply(clean_all_zero).reset_index(drop=True)\n", 250 | "# train.info()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": true 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# test = test.groupby('user_id').apply(clean_all_zero).reset_index(drop=True)\n", 262 | "# test.info()" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "# 添加公司用电量平滑处理" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 11, 275 | "metadata": { 276 | "collapsed": true 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "def rolling_power_consumption(df):\n", 281 | " ndf = df.set_index('record_date')\n", 282 | " ndf = ndf.rolling(2).mean()\n", 283 | " return ndf.reset_index()" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 12, 289 | "metadata": { 290 | "collapsed": false 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "train = train.groupby('user_id').apply(rolling_power_consumption).dropna(subset=['user_id']).reset_index(drop=True)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 13, 300 | "metadata": { 301 | "collapsed": false 302 | }, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "\n", 309 | "RangeIndex: 499677 entries, 0 to 499676\n", 310 | "Data columns (total 3 columns):\n", 311 | "record_date 499677 non-null datetime64[ns]\n", 312 | "user_id 499677 non-null float64\n", 313 | "power_consumption 499677 non-null float64\n", 314 | "dtypes: datetime64[ns](1), float64(2)\n", 315 | "memory usage: 11.4 MB\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "train.to_csv(train_path, index=False)\n", 321 | "train.info()" 322 | ] 323 | } 324 | ], 325 | "metadata": { 326 | "anaconda-cloud": {}, 327 | "hide_input": false, 328 | "kernelspec": { 329 | "display_name": "Python 3", 330 | "language": "python", 331 | "name": "python3" 332 | }, 333 | "language_info": { 334 | "codemirror_mode": { 335 | "name": "ipython", 336 | "version": 3 337 | }, 338 | "file_extension": ".py", 339 | "mimetype": "text/x-python", 340 | "name": "python", 341 | "nbconvert_exporter": "python", 342 | "pygments_lexer": "ipython3", 343 | "version": "3.5.1" 344 | } 345 | }, 346 | "nbformat": 4, 347 | "nbformat_minor": 2 348 | } 349 | -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/holiday_crawl-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 54, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "import pandas as pd\n", 13 | "import json\n", 14 | "from os.path import exists" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 59, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "def write_to_file(filename, data):\n", 26 | " if not exists(filename):\n", 27 | " with open(filename, 'w') as f:\n", 28 | " f.write('date,holiday\\n') \n", 29 | " for k, v in json.loads(data).items():\n", 30 | " if type(v) == str:\n", 31 | " v = int(v)\n", 32 | " with open(filename, 'a') as f:\n", 33 | " f.write('{},{}\\n'.format(k, v)) " 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "data = pd.date_range('2015/1/1', '2016/12/31')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "date_ags = data.map(lambda x:x.strftime('%Y%m%d'))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 19, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "url_fmt = 'http://tool.bitefu.net/jiari/?d={}'" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 30, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "agrv = ','.join(date_ags[:100])" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 31, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "'http://tool.bitefu.net/jiari/?d=20150101,20150102,20150103,20150104,20150105,20150106,20150107,20150108,20150109,20150110,20150111,20150112,20150113,20150114,20150115,20150116,20150117,20150118,20150119,20150120,20150121,20150122,20150123,20150124,20150125,20150126,20150127,20150128,20150129,20150130,20150131,20150201,20150202,20150203,20150204,20150205,20150206,20150207,20150208,20150209,20150210,20150211,20150212,20150213,20150214,20150215,20150216,20150217,20150218,20150219,20150220,20150221,20150222,20150223,20150224,20150225,20150226,20150227,20150228,20150301,20150302,20150303,20150304,20150305,20150306,20150307,20150308,20150309,20150310,20150311,20150312,20150313,20150314,20150315,20150316,20150317,20150318,20150319,20150320,20150321,20150322,20150323,20150324,20150325,20150326,20150327,20150328,20150329,20150330,20150331,20150401,20150402,20150403,20150404,20150405,20150406,20150407,20150408,20150409,20150410'" 91 | ] 92 | }, 93 | "execution_count": 31, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "url = url_fmt.format(agrv)\n", 100 | "url" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 63, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "for i in range(0, date_ags.shape[0], 100):\n", 112 | " agrv = ','.join(date_ags[i:(i+100)])\n", 113 | " url = url_fmt.format(agrv)\n", 114 | " res = requests.get(url)\n", 115 | " write_to_file('../dataset/holiday.csv', res.text)" 116 | ] 117 | } 118 | ], 119 | "metadata": { 120 | "anaconda-cloud": {}, 121 | "hide_input": false, 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.5.1" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/predict_result-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "/home/zhanglun/.pyenv/versions/anaconda3-4.1.0/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 15 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import pickle\n", 21 | "\n", 22 | "import numpy as np\n", 23 | "import scipy as sp\n", 24 | "import pandas as pd\n", 25 | "\n", 26 | "from sklearn.ensemble import RandomForestRegressor\n", 27 | "from sklearn.metrics import r2_score, roc_auc_score\n", 28 | "from sklearn import cross_validation\n", 29 | "from sklearn.model_selection import GridSearchCV\n", 30 | "from sklearn.svm import SVR " 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "model_path = '../dataset/fetures/model.pkl'\n", 42 | "output_path = '../submit/Tianchi_power_predict_table.csv'" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "fetures = pd.read_csv('../dataset/fetures/test_feture.csv', parse_dates=['predict_date'])" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "model = pickle.load(open(model_path, 'rb'))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "train_x = fetures.drop(['predict_date', 'predict_power_consumption'], axis=1)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 6, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "fetures.predict_power_consumption = model.predict(train_x.fillna(0)).astype(np.int64)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 7, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "fetures.predict_date = fetures.predict_date.map(lambda x: x.strftime('%Y%m%d'))" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "3754006.806451613" 111 | ] 112 | }, 113 | "execution_count": 8, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "fetures['predict_power_consumption'].mean()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "fetures.to_csv(output_path, index=False, columns=['predict_date', 'predict_power_consumption'])" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "anaconda-cloud": {}, 136 | "hide_input": false, 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.5.1" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/rules-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from datetime import datetime, timedelta\n", 12 | "\n", 13 | "import numpy as np\n", 14 | "import pandas as pd\n", 15 | "import scipy as sp" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "indexs = pd.date_range('2016/10/1', '2016/10/31')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "def gen_df(g):\n", 38 | " df = pd.DataFrame(index=indexs, columns=['power_consumption'])\n", 39 | " s = pd.Series(g.power_consumption, index=indexs)\n", 40 | " df['power_consumption'] = s\n", 41 | " df.fillna(df.power_consumption.mean(), inplace=True)\n", 42 | " return df" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "train_path = '../dataset/train.csv'" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "train_df = pd.read_csv(train_path, parse_dates=['record_date'])" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 6, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "train_df['record_date'] = train_df.record_date + timedelta(days=31)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 8, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "result_df = train_df.set_index('record_date').groupby(['user_id']).apply(gen_df)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 9, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "submit_df = result_df.reset_index().groupby('level_1')[['power_consumption']].sum()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 10, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "submit_df['predict_date'] = submit_df.index.map(lambda x:x.strftime('%Y%m%d'))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 11, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "submit_df['predict_power_consumption'] = submit_df.power_consumption.astype(int)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 12, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "submit_df.to_csv('../Tianchi_power_predict_table.csv', columns=['predict_date', 'predict_power_consumption'], index=False)" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "anaconda-cloud": {}, 136 | "hide_input": false, 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.5.1" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/run-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "!date" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# 规则,前一个月平均值 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "extract_ipy = 'trick_extract.ipynb'\n", 30 | "extract_py = 'trick_extract.py'" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# !jupyter nbconvert --to python $extract_ipy" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "# !python $extract_py test" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## 切分数据" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "!runipy split_samples.ipynb" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 过滤清洗数据" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "!jupyter nbconvert --to python clean_data.ipynb" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "!python clean_data.py test" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "for i in range(1, 10):\n", 111 | " !python clean_data.py train $i" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## 提取特征 " 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": true 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "extract_ipy = 'extract_fetures.ipynb'\n", 130 | "extract_py = 'extract_fetures.py'" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "!jupyter nbconvert --to python $extract_ipy" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "for i in range(1, 10): \n", 153 | " !python $extract_py train $i" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": true 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "!python $extract_py test" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": true 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "!runipy combine_fetures.ipynb" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## 训练模型 " 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "collapsed": false, 190 | "scrolled": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "!runipy train_model.ipynb" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## 模型预测 " 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "!runipy predict_result.ipynb" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "!date" 224 | ] 225 | } 226 | ], 227 | "metadata": { 228 | "anaconda-cloud": {}, 229 | "hide_input": false, 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.5.1" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/split_samples-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 123, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from os.path import exists, join\n", 12 | "from os import mkdir\n", 13 | "from datetime import datetime, timedelta, date\n", 14 | "from collections import namedtuple\n", 15 | "\n", 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import scipy as sp" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 124, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "train_base_dir = '../dataset/fetures/'" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# 移动滑窗划分成9个predict样本 " 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 125, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "df = pd.read_csv('../dataset/train.csv', parse_dates=['record_date'], index_col=['record_date'])" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 126, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "begin = df.index.max().date() + timedelta(1)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## 训练样本划分" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 127, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "TimeSpan = namedtuple('TimeSpan', ['train_start', 'train_end', 'predict_start', 'predict_end'])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 128, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "timespans = []" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 129, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "end = begin - timedelta(1)\n", 99 | "for i in range(9):\n", 100 | " p_end = date(begin.year, begin.month-i, begin.day)\n", 101 | " predict_end = p_end - timedelta(1)\n", 102 | " predict_start = date(begin.year, begin.month-i-1, begin.day)\n", 103 | " train_end = predict_start - timedelta(1)\n", 104 | " train_start = date(predict_start.year-1, predict_start.month, predict_start.day)\n", 105 | " span = TimeSpan(train_start, train_end, predict_start, predict_end)\n", 106 | "# print(span)\n", 107 | " timespans.append(span)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "##### 创建文件架 " 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 130, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "for span in timespans:\n", 126 | " dir_n = str(span.predict_start.month)\n", 127 | " path = join(train_base_dir, dir_n)\n", 128 | " if not exists(path):\n", 129 | " mkdir(path)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 131, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "span = timespans[0]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 132, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "def create_train_sample(timespan): \n", 152 | " train = df.loc[str(span.train_start):str(span.train_end)] \n", 153 | " train_predict = df.loc[str(span.predict_start):str(span.predict_end)].reset_index()\n", 154 | " train_predict = train_predict.groupby('record_date')[['power_consumption']].sum()\n", 155 | " train_predict['predict_power_consumption'] = train_predict.power_consumption.astype(np.int64)\n", 156 | " train_predict.drop('power_consumption', axis=1, inplace=True)\n", 157 | " train_predict.index.name = 'predict_date'\n", 158 | " train_path = join(train_base_dir, str(span.predict_start.month), 'train.csv')\n", 159 | " train_predict_path = join(train_base_dir, str(span.predict_start.month), 'train_predict.csv')\n", 160 | " train.to_csv(train_path)\n", 161 | " train.info()\n", 162 | " train_predict.to_csv(train_predict_path)\n", 163 | " train_predict.info()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 133, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "\n", 178 | "DatetimeIndex: 532156 entries, 2015-09-01 to 2016-08-31\n", 179 | "Data columns (total 2 columns):\n", 180 | "user_id 532156 non-null int64\n", 181 | "power_consumption 532156 non-null int64\n", 182 | "dtypes: int64(2)\n", 183 | "memory usage: 12.2 MB\n", 184 | "\n", 185 | "DatetimeIndex: 30 entries, 2016-09-01 to 2016-09-30\n", 186 | "Data columns (total 1 columns):\n", 187 | "predict_power_consumption 30 non-null int64\n", 188 | "dtypes: int64(1)\n", 189 | "memory usage: 480.0 bytes\n", 190 | "\n", 191 | "DatetimeIndex: 532156 entries, 2015-08-01 to 2016-07-31\n", 192 | "Data columns (total 2 columns):\n", 193 | "user_id 532156 non-null int64\n", 194 | "power_consumption 532156 non-null int64\n", 195 | "dtypes: int64(2)\n", 196 | "memory usage: 12.2 MB\n", 197 | "\n", 198 | "DatetimeIndex: 31 entries, 2016-08-01 to 2016-08-31\n", 199 | "Data columns (total 1 columns):\n", 200 | "predict_power_consumption 31 non-null int64\n", 201 | "dtypes: int64(1)\n", 202 | "memory usage: 496.0 bytes\n", 203 | "\n", 204 | "DatetimeIndex: 532153 entries, 2015-07-01 to 2016-06-30\n", 205 | "Data columns (total 2 columns):\n", 206 | "user_id 532153 non-null int64\n", 207 | "power_consumption 532153 non-null int64\n", 208 | "dtypes: int64(2)\n", 209 | "memory usage: 12.2 MB\n", 210 | "\n", 211 | "DatetimeIndex: 31 entries, 2016-07-01 to 2016-07-31\n", 212 | "Data columns (total 1 columns):\n", 213 | "predict_power_consumption 31 non-null int64\n", 214 | "dtypes: int64(1)\n", 215 | "memory usage: 496.0 bytes\n", 216 | "\n", 217 | "DatetimeIndex: 532150 entries, 2015-06-01 to 2016-05-31\n", 218 | "Data columns (total 2 columns):\n", 219 | "user_id 532150 non-null int64\n", 220 | "power_consumption 532150 non-null int64\n", 221 | "dtypes: int64(2)\n", 222 | "memory usage: 12.2 MB\n", 223 | "\n", 224 | "DatetimeIndex: 30 entries, 2016-06-01 to 2016-06-30\n", 225 | "Data columns (total 1 columns):\n", 226 | "predict_power_consumption 30 non-null int64\n", 227 | "dtypes: int64(1)\n", 228 | "memory usage: 480.0 bytes\n", 229 | "\n", 230 | "DatetimeIndex: 532150 entries, 2015-05-01 to 2016-04-30\n", 231 | "Data columns (total 2 columns):\n", 232 | "user_id 532150 non-null int64\n", 233 | "power_consumption 532150 non-null int64\n", 234 | "dtypes: int64(2)\n", 235 | "memory usage: 12.2 MB\n", 236 | "\n", 237 | "DatetimeIndex: 31 entries, 2016-05-01 to 2016-05-31\n", 238 | "Data columns (total 1 columns):\n", 239 | "predict_power_consumption 31 non-null int64\n", 240 | "dtypes: int64(1)\n", 241 | "memory usage: 496.0 bytes\n", 242 | "\n", 243 | "DatetimeIndex: 532148 entries, 2015-04-01 to 2016-03-31\n", 244 | "Data columns (total 2 columns):\n", 245 | "user_id 532148 non-null int64\n", 246 | "power_consumption 532148 non-null int64\n", 247 | "dtypes: int64(2)\n", 248 | "memory usage: 12.2 MB\n", 249 | "\n", 250 | "DatetimeIndex: 30 entries, 2016-04-01 to 2016-04-30\n", 251 | "Data columns (total 1 columns):\n", 252 | "predict_power_consumption 30 non-null int64\n", 253 | "dtypes: int64(1)\n", 254 | "memory usage: 480.0 bytes\n", 255 | "\n", 256 | "DatetimeIndex: 532147 entries, 2015-03-01 to 2016-02-29\n", 257 | "Data columns (total 2 columns):\n", 258 | "user_id 532147 non-null int64\n", 259 | "power_consumption 532147 non-null int64\n", 260 | "dtypes: int64(2)\n", 261 | "memory usage: 12.2 MB\n", 262 | "\n", 263 | "DatetimeIndex: 31 entries, 2016-03-01 to 2016-03-31\n", 264 | "Data columns (total 1 columns):\n", 265 | "predict_power_consumption 31 non-null int64\n", 266 | "dtypes: int64(1)\n", 267 | "memory usage: 496.0 bytes\n", 268 | "\n", 269 | "DatetimeIndex: 530692 entries, 2015-02-01 to 2016-01-31\n", 270 | "Data columns (total 2 columns):\n", 271 | "user_id 530692 non-null int64\n", 272 | "power_consumption 530692 non-null int64\n", 273 | "dtypes: int64(2)\n", 274 | "memory usage: 12.1 MB\n", 275 | "\n", 276 | "DatetimeIndex: 29 entries, 2016-02-01 to 2016-02-29\n", 277 | "Data columns (total 1 columns):\n", 278 | "predict_power_consumption 29 non-null int64\n", 279 | "dtypes: int64(1)\n", 280 | "memory usage: 464.0 bytes\n", 281 | "\n", 282 | "DatetimeIndex: 530693 entries, 2015-01-01 to 2015-12-31\n", 283 | "Data columns (total 2 columns):\n", 284 | "user_id 530693 non-null int64\n", 285 | "power_consumption 530693 non-null int64\n", 286 | "dtypes: int64(2)\n", 287 | "memory usage: 12.1 MB\n", 288 | "\n", 289 | "DatetimeIndex: 31 entries, 2016-01-01 to 2016-01-31\n", 290 | "Data columns (total 1 columns):\n", 291 | "predict_power_consumption 31 non-null int64\n", 292 | "dtypes: int64(1)\n", 293 | "memory usage: 496.0 bytes\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "for span in timespans:\n", 299 | " create_train_sample(span)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 138, 305 | "metadata": { 306 | "collapsed": true 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "test = df.loc['2015/10/1':'2016/9/30']" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 139, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "index = pd.date_range('2016/10/1', '2016/10/31')\n", 322 | "test_predict = pd.DataFrame(0, index=index, columns=['predict_power_consumption'])\n", 323 | "test_predict.index.name = 'predict_date'" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 140, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "\n", 338 | "DatetimeIndex: 532159 entries, 2015-10-01 to 2016-09-28\n", 339 | "Data columns (total 2 columns):\n", 340 | "user_id 532159 non-null int64\n", 341 | "power_consumption 532159 non-null int64\n", 342 | "dtypes: int64(2)\n", 343 | "memory usage: 12.2 MB\n" 344 | ] 345 | } 346 | ], 347 | "source": [ 348 | "test.to_csv('../dataset/fetures/test.csv')\n", 349 | "test.info()" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 142, 355 | "metadata": { 356 | "collapsed": false 357 | }, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "\n", 364 | "DatetimeIndex: 31 entries, 2016-10-01 to 2016-10-31\n", 365 | "Freq: D\n", 366 | "Data columns (total 1 columns):\n", 367 | "predict_power_consumption 31 non-null int64\n", 368 | "dtypes: int64(1)\n", 369 | "memory usage: 496.0 bytes\n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "test_predict.to_csv('../dataset/fetures/test_predict.csv')\n", 375 | "test_predict.info()" 376 | ] 377 | } 378 | ], 379 | "metadata": { 380 | "anaconda-cloud": {}, 381 | "hide_input": false, 382 | "kernelspec": { 383 | "display_name": "Python 3", 384 | "language": "python", 385 | "name": "python3" 386 | }, 387 | "language_info": { 388 | "codemirror_mode": { 389 | "name": "ipython", 390 | "version": 3 391 | }, 392 | "file_extension": ".py", 393 | "mimetype": "text/x-python", 394 | "name": "python", 395 | "nbconvert_exporter": "python", 396 | "pygments_lexer": "ipython3", 397 | "version": "3.5.1" 398 | } 399 | }, 400 | "nbformat": 4, 401 | "nbformat_minor": 2 402 | } 403 | -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/train_model-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 35, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pickle\n", 12 | "from datetime import datetime\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import scipy as sp\n", 16 | "import pandas as pd\n", 17 | "\n", 18 | "from sklearn.ensemble import RandomForestRegressor\n", 19 | "from sklearn.metrics import r2_score, roc_auc_score\n", 20 | "from sklearn import cross_validation\n", 21 | "from sklearn.model_selection import GridSearchCV\n", 22 | "from sklearn.svm import SVR\n", 23 | "import mlxtend" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 36, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "from mlxtend.regressor import StackingRegressor\n", 35 | "from mlxtend.data import boston_housing_data\n", 36 | "from sklearn.linear_model import LinearRegression\n", 37 | "from sklearn.linear_model import Ridge\n", 38 | "from sklearn.svm import SVR\n", 39 | "from sklearn.ensemble import ExtraTreesRegressor" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 37, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "def loss_score(predict, real):\n", 51 | " f = (real - predict)/real\n", 52 | " n = len(f)\n", 53 | " f = f.replace([np.nan, -np.nan], 0)\n", 54 | " score = 1 - np.abs(f).sum()/n\n", 55 | " return score " 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 38, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "model_path = '../dataset/fetures/model.pkl'" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 39, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "fetures = pd.read_csv('../dataset/fetures/train_feture.csv')" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 40, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "train_x = fetures.drop(['predict_date', 'predict_power_consumption'], axis=1)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 41, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "train_y = fetures['predict_power_consumption']" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### 使用 gridsearch 调参" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 43, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "parameters = {\n", 118 | " 'n_estimators': [10, 100, 200, 300, 500],\n", 119 | "# 'criterion': ['mse', 'mae'],\n", 120 | " 'max_features': [10, 50, 100, 200, 'sqrt']\n", 121 | "}" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 44, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# model = RandomForestRegressor(n_jobs=-1, oob_score=True)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 45, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "# cv = cross_validation.ShuffleSplit(train_x.shape[0], n_iter=8, test_size=0.1, random_state=0)\n", 144 | "# clf = GridSearchCV(model, parameters, cv=cv)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 46, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# clf.fit(train_x.fillna(0), train_y)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 47, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# model = RandomForestRegressor(n_jobs=-1, oob_score=True, random_state=666, **clf.best_params_)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 48, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "# cv = cross_validation.ShuffleSplit(train_x.shape[0], n_iter=5, test_size=0.1, random_state=666)\n", 178 | "# r2_score = cross_validation.cross_val_score(model, train_x.fillna(0), train_y, cv=cv)\n", 179 | "# r2_score" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 288, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "cv = cross_validation.ShuffleSplit(train_x.shape[0], n_iter=10, test_size=0.2, random_state=666)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 290, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "forest = RandomForestRegressor(\n", 202 | " n_jobs=-1,\n", 203 | "# criterion='mae',\n", 204 | " random_state=1,\n", 205 | " oob_score=True,\n", 206 | " n_estimators=100,\n", 207 | " max_features='auto')\n", 208 | "fores = RandomForestRegressor(\n", 209 | " n_jobs=-1,\n", 210 | " random_state=2,\n", 211 | " oob_score=True,\n", 212 | " n_estimators=300,\n", 213 | " max_features=300)\n", 214 | "extract = ExtraTreesRegressor(\n", 215 | " n_jobs=-1,\n", 216 | " random_state=3,\n", 217 | "# criterion='mae',\n", 218 | " bootstrap=True,\n", 219 | " oob_score=True,\n", 220 | " n_estimators=100,\n", 221 | " max_features='auto')\n", 222 | "extrac = ExtraTreesRegressor(\n", 223 | " n_jobs=-1,\n", 224 | " random_state=4,\n", 225 | " bootstrap=True,\n", 226 | " oob_score=True,\n", 227 | " n_estimators=300,\n", 228 | " max_features=300)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 292, 234 | "metadata": { 235 | "collapsed": true 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "clfs = [forest, fores, extract, extrac]" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 293, 245 | "metadata": { 246 | "collapsed": true 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "params = {\n", 251 | " 'forest__n-estimators': [10, 50, 100],\n", 252 | " 'forest__max-features': [10, 50, 100],\n", 253 | " 'fores__n-estimators': [100, 200, 500],\n", 254 | " 'fores__max-features': [100, 200, 300],\n", 255 | " 'extract__n-estimators': [10, 50, 100],\n", 256 | " 'extract__max-features': [10, 50, 100],\n", 257 | " 'extrac__n-estimators': [100, 200, 500],\n", 258 | " 'extrac__max-features': [100, 200, 300],\n", 259 | "}" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 294, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "meta = LinearRegression(n_jobs=-1)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 295, 276 | "metadata": { 277 | "collapsed": false 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "stacker = StackingRegressor(regressors=clfs, meta_regressor=meta, verbose=True)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 296, 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "grid = GridSearchCV(estimator=stacker, param_grid=params, cv=cv)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 297, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "{'bootstrap': True,\n", 306 | " 'criterion': 'mse',\n", 307 | " 'max_depth': None,\n", 308 | " 'max_features': 'auto',\n", 309 | " 'max_leaf_nodes': None,\n", 310 | " 'min_impurity_split': 1e-07,\n", 311 | " 'min_samples_leaf': 1,\n", 312 | " 'min_samples_split': 2,\n", 313 | " 'min_weight_fraction_leaf': 0.0,\n", 314 | " 'n_estimators': 100,\n", 315 | " 'n_jobs': -1,\n", 316 | " 'oob_score': True,\n", 317 | " 'random_state': 3,\n", 318 | " 'verbose': 0,\n", 319 | " 'warm_start': False}" 320 | ] 321 | }, 322 | "execution_count": 297, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "extract.get_params()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 298, 334 | "metadata": { 335 | "collapsed": true 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "# grid.fit(train_x.fillna(0), train_y)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 300, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "Fitting 4 regressors...\n", 354 | "Fitting regressor1: randomforestregressor (1/4)\n", 355 | "Fitting 4 regressors...\n", 356 | "Fitting regressor1: randomforestregressor (1/4)\n", 357 | "Fitting 4 regressors...\n", 358 | "Fitting regressor1: randomforestregressor (1/4)\n", 359 | "Fitting 4 regressors...\n", 360 | "Fitting regressor1: randomforestregressor (1/4)\n", 361 | "Fitting regressor2: randomforestregressor (2/4)\n", 362 | "Fitting regressor2: randomforestregressor (2/4)\n", 363 | "Fitting regressor2: randomforestregressor (2/4)\n", 364 | "Fitting regressor2: randomforestregressor (2/4)\n", 365 | "Fitting regressor3: extratreesregressor (3/4)\n", 366 | "Fitting regressor3: extratreesregressor (3/4)\n", 367 | "Fitting regressor3: extratreesregressor (3/4)\n", 368 | "Fitting regressor3: extratreesregressor (3/4)\n", 369 | "Fitting regressor4: extratreesregressor (4/4)\n", 370 | "Fitting regressor4: extratreesregressor (4/4)\n", 371 | "Fitting regressor4: extratreesregressor (4/4)\n", 372 | "Fitting regressor4: extratreesregressor (4/4)\n", 373 | "Fitting 4 regressors...\n", 374 | "Fitting regressor1: randomforestregressor (1/4)\n", 375 | "Fitting 4 regressors...\n", 376 | "Fitting regressor1: randomforestregressor (1/4)\n", 377 | "Fitting 4 regressors...\n", 378 | "Fitting regressor1: randomforestregressor (1/4)\n", 379 | "Fitting 4 regressors...\n", 380 | "Fitting regressor1: randomforestregressor (1/4)\n", 381 | "Fitting regressor2: randomforestregressor (2/4)\n", 382 | "Fitting regressor2: randomforestregressor (2/4)\n", 383 | "Fitting regressor2: randomforestregressor (2/4)\n", 384 | "Fitting regressor2: randomforestregressor (2/4)\n", 385 | "Fitting regressor3: extratreesregressor (3/4)\n", 386 | "Fitting regressor3: extratreesregressor (3/4)\n", 387 | "Fitting regressor3: extratreesregressor (3/4)\n", 388 | "Fitting regressor3: extratreesregressor (3/4)\n", 389 | "Fitting regressor4: extratreesregressor (4/4)\n", 390 | "Fitting regressor4: extratreesregressor (4/4)\n", 391 | "Fitting regressor4: extratreesregressor (4/4)\n", 392 | "Fitting regressor4: extratreesregressor (4/4)\n", 393 | "Fitting 4 regressors...\n", 394 | "Fitting regressor1: randomforestregressor (1/4)\n", 395 | "Fitting 4 regressors...\n", 396 | "Fitting regressor1: randomforestregressor (1/4)\n", 397 | "Fitting regressor2: randomforestregressor (2/4)\n", 398 | "Fitting regressor2: randomforestregressor (2/4)\n", 399 | "Fitting regressor3: extratreesregressor (3/4)\n", 400 | "Fitting regressor3: extratreesregressor (3/4)\n", 401 | "Fitting regressor4: extratreesregressor (4/4)\n", 402 | "Fitting regressor4: extratreesregressor (4/4)\n" 403 | ] 404 | }, 405 | { 406 | "name": "stderr", 407 | "output_type": "stream", 408 | "text": [ 409 | "[Parallel(n_jobs=-1)]: Done 10 out of 10 | elapsed: 2.1min finished\n" 410 | ] 411 | } 412 | ], 413 | "source": [ 414 | "r2_score = cross_validation.cross_val_score(stacker, train_x, train_y, cv=cv, verbose=True, n_jobs=-1)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 301, 420 | "metadata": { 421 | "collapsed": true 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "# grid.fit(train_x.fillna(0), train_y)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 302, 431 | "metadata": { 432 | "collapsed": false 433 | }, 434 | "outputs": [ 435 | { 436 | "data": { 437 | "text/plain": [ 438 | "(0.78898896165565757, 0.86008654041103993, 0.68182410751042843)" 439 | ] 440 | }, 441 | "execution_count": 302, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "r2_score.mean(), r2_score.max(), r2_score.min()" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 303, 453 | "metadata": { 454 | "collapsed": false 455 | }, 456 | "outputs": [ 457 | { 458 | "name": "stdout", 459 | "output_type": "stream", 460 | "text": [ 461 | "Fitting 4 regressors...\n", 462 | "Fitting regressor1: randomforestregressor (1/4)\n", 463 | "Fitting regressor2: randomforestregressor (2/4)\n", 464 | "Fitting regressor3: extratreesregressor (3/4)\n", 465 | "Fitting regressor4: extratreesregressor (4/4)\n" 466 | ] 467 | }, 468 | { 469 | "data": { 470 | "text/plain": [ 471 | "StackingRegressor(meta_regressor=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=-1, normalize=False),\n", 472 | " regressors=[RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n", 473 | " max_features='auto', max_leaf_nodes=None,\n", 474 | " min_impurity_split=1e-07, min_samples_leaf=1,\n", 475 | " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", 476 | " n_estimators=100, n_jobs=-1, oob_s...n_estimators=300, n_jobs=-1,\n", 477 | " oob_score=True, random_state=4, verbose=0, warm_start=False)],\n", 478 | " verbose=True)" 479 | ] 480 | }, 481 | "execution_count": 303, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "stacker.fit(train_x.fillna(0), train_y)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 304, 493 | "metadata": { 494 | "collapsed": false 495 | }, 496 | "outputs": [], 497 | "source": [ 498 | "r2_all_scorr = stacker.score(train_x.fillna(0), train_y)" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": { 505 | "collapsed": true 506 | }, 507 | "outputs": [], 508 | "source": [ 509 | "# importance_df = pd.DataFrame(model.feature_importances_, index=train_x.columns)\n", 510 | "\n", 511 | "# importance_df.sort_values(0, ascending=False, inplace=True)\n", 512 | "\n", 513 | "# importance_top20 = ' , '.join(['{}:{} '.format(x, importance_df.loc[x].values[0]) for x in importance_df.index][:20])\n", 514 | "# importance_top20" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 305, 520 | "metadata": { 521 | "collapsed": false 522 | }, 523 | "outputs": [ 524 | { 525 | "data": { 526 | "text/plain": [ 527 | "0.9876500688451734" 528 | ] 529 | }, 530 | "execution_count": 305, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "predict_result = stacker.predict(train_x.fillna(0))\n", 537 | "loss_s = loss_score(predict_result, train_y)\n", 538 | "loss_s" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 306, 544 | "metadata": { 545 | "collapsed": false 546 | }, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "(0.9876500688451734,\n", 552 | " 0.98078256144556675,\n", 553 | " 0.78898896165565757,\n", 554 | " 0.86008654041103993,\n", 555 | " 0.68182410751042843)" 556 | ] 557 | }, 558 | "execution_count": 306, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | } 562 | ], 563 | "source": [ 564 | "result = loss_s, r2_all_scorr, r2_score.mean(), r2_score.max(), r2_score.min()\n", 565 | "result" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 307, 571 | "metadata": { 572 | "collapsed": false 573 | }, 574 | "outputs": [ 575 | { 576 | "name": "stdout", 577 | "output_type": "stream", 578 | "text": [ 579 | "2017-06-13 07:55:11.228802 result: (0.9876500688451734, 0.98078256144556675, 0.78898896165565757, 0.86008654041103993, 0.68182410751042843)\n" 580 | ] 581 | } 582 | ], 583 | "source": [ 584 | "print(datetime.now(), 'result:', result)\n", 585 | "print(\n", 586 | " datetime.now(),\n", 587 | " 'result:', result,\n", 588 | " file=open('result.txt', 'a+'),\n", 589 | " sep=' ',\n", 590 | " end='\\n')" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 308, 596 | "metadata": { 597 | "collapsed": false 598 | }, 599 | "outputs": [], 600 | "source": [ 601 | "pickle.dump(stacker, open(model_path, 'wb'))" 602 | ] 603 | } 604 | ], 605 | "metadata": { 606 | "anaconda-cloud": {}, 607 | "hide_input": false, 608 | "kernelspec": { 609 | "display_name": "Python 3", 610 | "language": "python", 611 | "name": "python3" 612 | }, 613 | "language_info": { 614 | "codemirror_mode": { 615 | "name": "ipython", 616 | "version": 3 617 | }, 618 | "file_extension": ".py", 619 | "mimetype": "text/x-python", 620 | "name": "python", 621 | "nbconvert_exporter": "python", 622 | "pygments_lexer": "ipython3", 623 | "version": "3.5.1" 624 | } 625 | }, 626 | "nbformat": 4, 627 | "nbformat_minor": 2 628 | } 629 | -------------------------------------------------------------------------------- /code/clean_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import sys\n", 12 | "from datetime import datetime, timedelta, date\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import pandas as pd\n", 16 | "import scipy as sp\n", 17 | "\n", 18 | "from sklearn.ensemble import IsolationForest" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "if sys.argv[1] == 'test':\n", 30 | " is_train = False\n", 31 | " train_path = '../dataset/fetures/test.csv'\n", 32 | "else:\n", 33 | " is_train = True\n", 34 | " if sys.argv[2].endswith('.json'):\n", 35 | " month = '9'\n", 36 | " else:\n", 37 | " month = sys.argv[2]\n", 38 | " train_path = '../dataset/fetures/{}/train.csv'.format(month)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "train = pd.read_csv(train_path, parse_dates=['record_date'])" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "\n", 64 | "RangeIndex: 532156 entries, 0 to 532155\n", 65 | "Data columns (total 3 columns):\n", 66 | "record_date 532156 non-null datetime64[ns]\n", 67 | "user_id 532156 non-null int64\n", 68 | "power_consumption 532156 non-null int64\n", 69 | "dtypes: datetime64[ns](1), int64(2)\n", 70 | "memory usage: 12.2 MB\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "train.info()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "threshold = 0.95" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 6, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "def clean_quantile(df):\n", 98 | " up = df.power_consumption.quantile(threshold)\n", 99 | " down = df.power_consumption.quantile(1-threshold)\n", 100 | " mean = df.power_consumption.mean()\n", 101 | " print(df['user_id'].iloc[0], 'up:', up, 'down:', down, 'mean:', mean, end='\\n', file=open('../dataset/clean.txt', 'a+'))\n", 102 | " df.loc[(df.power_consumption>=up)|(df.power_consumption<=down), 'power_consumption'] = df.power_consumption.mean()\n", 103 | " return df" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 7, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "train = train.groupby('user_id', as_index=True).apply(clean_quantile)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "# 去除这些公司后结果变差,可能是因为未来也会造成影响" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 9, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "def clean_all_zero(df):\n", 133 | " ndf = df.loc[df.power_consumption!=1]\n", 134 | " if not ndf.empty:\n", 135 | " return df\n", 136 | " else:\n", 137 | " print(df['user_id'].iloc[0])\n", 138 | " return ndf\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 10, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "259\n", 153 | "413\n", 154 | "418\n", 155 | "462\n", 156 | "469\n", 157 | "522\n", 158 | "533\n", 159 | "549\n", 160 | "550\n", 161 | "551\n", 162 | "553\n", 163 | "557\n", 164 | "570\n", 165 | "586\n", 166 | "588\n", 167 | "589\n", 168 | "598\n", 169 | "606\n", 170 | "609\n", 171 | "617\n", 172 | "622\n", 173 | "633\n", 174 | "634\n", 175 | "637\n", 176 | "638\n", 177 | "639\n", 178 | "645\n", 179 | "646\n", 180 | "647\n", 181 | "650\n", 182 | "651\n", 183 | "652\n", 184 | "653\n", 185 | "654\n", 186 | "655\n", 187 | "656\n", 188 | "657\n", 189 | "658\n", 190 | "661\n", 191 | "663\n", 192 | "664\n", 193 | "665\n", 194 | "666\n", 195 | "667\n", 196 | "668\n", 197 | "669\n", 198 | "670\n", 199 | "671\n", 200 | "672\n", 201 | "673\n", 202 | "674\n", 203 | "675\n", 204 | "676\n", 205 | "677\n", 206 | "678\n", 207 | "679\n", 208 | "680\n", 209 | "681\n", 210 | "682\n", 211 | "683\n", 212 | "684\n", 213 | "685\n", 214 | "686\n", 215 | "687\n", 216 | "688\n", 217 | "689\n", 218 | "690\n", 219 | "691\n", 220 | "692\n", 221 | "693\n", 222 | "694\n", 223 | "695\n", 224 | "696\n", 225 | "709\n", 226 | "891\n", 227 | "950\n", 228 | "1044\n", 229 | "1045\n", 230 | "1111\n", 231 | "1129\n", 232 | "1233\n", 233 | "1234\n", 234 | "1261\n", 235 | "1280\n", 236 | "1413\n", 237 | "\n", 238 | "RangeIndex: 501046 entries, 0 to 501045\n", 239 | "Data columns (total 3 columns):\n", 240 | "record_date 501046 non-null datetime64[ns]\n", 241 | "user_id 501046 non-null int64\n", 242 | "power_consumption 501046 non-null float64\n", 243 | "dtypes: datetime64[ns](1), float64(1), int64(1)\n", 244 | "memory usage: 11.5 MB\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "# train = train.groupby('user_id').apply(clean_all_zero).reset_index(drop=True)\n", 250 | "# train.info()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": true 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# test = test.groupby('user_id').apply(clean_all_zero).reset_index(drop=True)\n", 262 | "# test.info()" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "# 添加公司用电量平滑处理" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 11, 275 | "metadata": { 276 | "collapsed": true 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "def rolling_power_consumption(df):\n", 281 | " ndf = df.set_index('record_date')\n", 282 | " ndf = ndf.rolling(2).mean()\n", 283 | " return ndf.reset_index()" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 12, 289 | "metadata": { 290 | "collapsed": false 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "train = train.groupby('user_id').apply(rolling_power_consumption).dropna(subset=['user_id']).reset_index(drop=True)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 13, 300 | "metadata": { 301 | "collapsed": false 302 | }, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "\n", 309 | "RangeIndex: 499677 entries, 0 to 499676\n", 310 | "Data columns (total 3 columns):\n", 311 | "record_date 499677 non-null datetime64[ns]\n", 312 | "user_id 499677 non-null float64\n", 313 | "power_consumption 499677 non-null float64\n", 314 | "dtypes: datetime64[ns](1), float64(2)\n", 315 | "memory usage: 11.4 MB\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "train.to_csv(train_path, index=False)\n", 321 | "train.info()" 322 | ] 323 | } 324 | ], 325 | "metadata": { 326 | "anaconda-cloud": {}, 327 | "hide_input": false, 328 | "kernelspec": { 329 | "display_name": "Python 3", 330 | "language": "python", 331 | "name": "python3" 332 | }, 333 | "language_info": { 334 | "codemirror_mode": { 335 | "name": "ipython", 336 | "version": 3 337 | }, 338 | "file_extension": ".py", 339 | "mimetype": "text/x-python", 340 | "name": "python", 341 | "nbconvert_exporter": "python", 342 | "pygments_lexer": "ipython3", 343 | "version": "3.5.1" 344 | } 345 | }, 346 | "nbformat": 4, 347 | "nbformat_minor": 2 348 | } 349 | -------------------------------------------------------------------------------- /code/clean_data.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | import sys 7 | from datetime import datetime, timedelta, date 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import scipy as sp 12 | 13 | from sklearn.ensemble import IsolationForest 14 | 15 | 16 | # In[2]: 17 | 18 | if sys.argv[1] == 'test': 19 | is_train = False 20 | train_path = '../dataset/fetures/test.csv' 21 | else: 22 | is_train = True 23 | if sys.argv[2].endswith('.json'): 24 | month = '9' 25 | else: 26 | month = sys.argv[2] 27 | train_path = '../dataset/fetures/{}/train.csv'.format(month) 28 | 29 | 30 | # In[3]: 31 | 32 | train = pd.read_csv(train_path, parse_dates=['record_date']) 33 | 34 | 35 | # In[4]: 36 | 37 | train.info() 38 | 39 | 40 | # In[5]: 41 | 42 | threshold = 0.95 43 | 44 | 45 | # In[6]: 46 | 47 | def clean_quantile(df): 48 | up = df.power_consumption.quantile(threshold) 49 | down = df.power_consumption.quantile(1-threshold) 50 | mean = df.power_consumption.mean() 51 | print(df['user_id'].iloc[0], 'up:', up, 'down:', down, 'mean:', mean, end='\n', file=open('../dataset/clean.txt', 'a+')) 52 | df.loc[(df.power_consumption>=up)|(df.power_consumption<=down), 'power_consumption'] = df.power_consumption.mean() 53 | return df 54 | 55 | 56 | # In[7]: 57 | 58 | train = train.groupby('user_id', as_index=True).apply(clean_quantile) 59 | 60 | 61 | # # 去除这些公司后结果变差,可能是因为未来也会造成影响 62 | 63 | # In[9]: 64 | 65 | def clean_all_zero(df): 66 | ndf = df.loc[df.power_consumption!=1] 67 | if not ndf.empty: 68 | return df 69 | else: 70 | print(df['user_id'].iloc[0]) 71 | return ndf 72 | 73 | 74 | # In[10]: 75 | 76 | # train = train.groupby('user_id').apply(clean_all_zero).reset_index(drop=True) 77 | # train.info() 78 | 79 | 80 | # In[ ]: 81 | 82 | # test = test.groupby('user_id').apply(clean_all_zero).reset_index(drop=True) 83 | # test.info() 84 | 85 | 86 | # # 添加公司用电量平滑处理 87 | 88 | # In[11]: 89 | 90 | def rolling_power_consumption(df): 91 | ndf = df.set_index('record_date') 92 | ndf = ndf.rolling(2).mean() 93 | return ndf.reset_index() 94 | 95 | 96 | # In[12]: 97 | 98 | train = train.groupby('user_id').apply(rolling_power_consumption).dropna(subset=['user_id']).reset_index(drop=True) 99 | 100 | 101 | # In[13]: 102 | 103 | train.to_csv(train_path, index=False) 104 | train.info() 105 | 106 | -------------------------------------------------------------------------------- /code/extract_fetures.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | import sys 7 | from datetime import datetime, timedelta, date 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import scipy as sp 12 | 13 | 14 | # In[2]: 15 | 16 | def loss_score(predict, real): 17 | f = (real - predict)/real 18 | n = len(f) 19 | f = f.replace([np.nan, -np.nan], 0) 20 | score = 1 - np.abs(f).sum()/n 21 | return score 22 | 23 | 24 | # In[3]: 25 | 26 | if sys.argv[1] == 'test': 27 | is_train = False 28 | train_path = '../dataset/fetures/test.csv' 29 | predict_path = '../dataset/fetures/test_predict.csv' 30 | feture_path = '../dataset/fetures/test_feture.csv' 31 | 32 | else: 33 | is_train = True 34 | if sys.argv[2].endswith('.json'): 35 | month = '9' 36 | else: 37 | month = sys.argv[2] 38 | train_path = '../dataset/fetures/{}/train.csv'.format(month) 39 | predict_path = '../dataset/fetures/{}/train_predict.csv'.format(month) 40 | feture_path = '../dataset/fetures/{}/train_feture.csv'.format(month) 41 | 42 | 43 | 44 | # In[4]: 45 | 46 | train = pd.read_csv(train_path, parse_dates=['record_date']) 47 | predict = pd.read_csv(predict_path, parse_dates=['predict_date']) 48 | 49 | 50 | # In[5]: 51 | 52 | end_date = train.record_date.max().date() 53 | 54 | 55 | # In[6]: 56 | 57 | def create_timespan(end_date, m_span): 58 | predict_start = end_date + timedelta(1) 59 | n_month = predict_start.month - m_span 60 | if n_month < 1: 61 | n_month = 12 + n_month 62 | n_year = predict_start.year - 1 63 | else: 64 | n_year = predict_start.year 65 | n_date = date(n_year, n_month, predict_start.day) 66 | return (predict_start - n_date).days - 1 67 | 68 | 69 | # In[7]: 70 | 71 | # create_timespan(end_date, 13) 72 | 73 | 74 | # In[8]: 75 | 76 | time_spans = [30, 60, 90, 180, 366] 77 | # time_spans = [30, 60, 90, 150, 180, 210, 270, 300, 366] # over fit 78 | 79 | 80 | # In[9]: 81 | 82 | # for span in time_month_spans: 83 | # time_spans.append( create_timespan(end_date, span) ) 84 | 85 | 86 | # In[10]: 87 | 88 | print(time_spans) 89 | 90 | 91 | # In[11]: 92 | 93 | used_data_fetures = [ 94 | 'dayofweek', 'dayofyear', 'days_in_month', 'quarter', 'week', 'weekofyear', 95 | 'month', 'year' 96 | ] 97 | 98 | 99 | # In[12]: 100 | 101 | def get_train_df(date_span, train): 102 | begin_date = end_date - timedelta(date_span) 103 | train_df = train.set_index(['record_date']).loc[str(begin_date):str(end_date)].reset_index() 104 | return train_df 105 | 106 | 107 | # In[13]: 108 | 109 | def add_feture_in_date(used_feture, predict, train_df): 110 | for f in used_data_fetures: 111 | predict[f] = getattr(predict['predict_date'].dt, f) 112 | train_df[f] = getattr(train_df['record_date'].dt, f) 113 | 114 | 115 | # In[14]: 116 | 117 | def extract_describe_feture(train_df, predict, f, date_span): 118 | df = train_df.groupby(f).describe()['power_consumption'].unstack() 119 | column_fmt = 'post{}_{}_{}' 120 | df.columns = [column_fmt.format(date_span, f, x) for x in df.columns] 121 | predict = predict.join(df, on=f) 122 | return predict 123 | 124 | 125 | # In[15]: 126 | 127 | def extract_post_day_describe(date_span, train, predict, used_data_fetures, add_feture=True): 128 | train_df = get_train_df(date_span, train) 129 | if add_feture: 130 | add_feture_in_date(used_data_fetures, predict, train_df) 131 | for f in used_data_fetures: 132 | predict = extract_describe_feture(train_df, predict, f, date_span) 133 | return predict 134 | 135 | 136 | 137 | # In[16]: 138 | 139 | for date_span in time_spans: 140 | predict = extract_post_day_describe(date_span, train, predict, used_data_fetures) 141 | 142 | 143 | # # 节假日特征 144 | 145 | # In[17]: 146 | 147 | use_holiday_fetures = ['is_week', 'is_weekend', 'is_festival', 'is_holiday'] 148 | 149 | 150 | # In[18]: 151 | 152 | holiday_df = pd.read_csv('../dataset/holiday.csv', parse_dates=['date']) 153 | holiday_df.head(2) 154 | 155 | 156 | # In[19]: 157 | 158 | holiday_df['is_holiday'] = 0 159 | 160 | holiday_df.loc[holiday_df.holiday!=0, 'is_holiday'] = 1 161 | 162 | 163 | # In[20]: 164 | 165 | df = pd.get_dummies(holiday_df.holiday) 166 | 167 | 168 | # In[21]: 169 | 170 | df.columns = ['is_week', 'is_weekend', 'is_festival'] 171 | 172 | 173 | # In[22]: 174 | 175 | holiday_df = holiday_df.join(df) 176 | 177 | 178 | # In[23]: 179 | 180 | holiday_df.drop('holiday', axis=1, inplace=True) 181 | 182 | 183 | # In[24]: 184 | 185 | holiday_df.set_index('date', inplace=True) 186 | 187 | 188 | # #### 添加日期特征至训练集 189 | 190 | # In[25]: 191 | 192 | train = train.join(holiday_df, on='record_date') 193 | predict = predict.join(holiday_df, on='predict_date') 194 | 195 | 196 | # In[26]: 197 | 198 | for date_span in time_spans: 199 | predict = extract_post_day_describe( 200 | date_span, 201 | train, 202 | predict, 203 | use_holiday_fetures, 204 | add_feture=False) 205 | 206 | 207 | # ### 假期平均统计特征 208 | 209 | # In[27]: 210 | 211 | def apply_describe_day_sum(group): 212 | df = group.groupby('record_date')[['power_consumption']].sum().describe().unstack()['power_consumption'] 213 | return df 214 | 215 | 216 | # In[28]: 217 | 218 | def extract_day_consumption_describe(span, f, train, predict, add_dt_feture=False): 219 | tdf = get_train_df(span, train) 220 | if add_dt_feture: 221 | add_feture_in_date([f], predict, tdf) 222 | name_fmt = 'post{}_{}_{}_day_consumption' 223 | df = tdf.groupby(f).apply(apply_describe_day_sum) 224 | df.columns = [name_fmt.format(span, f, x) for x in df.columns] 225 | predict = predict.join(df, on=f) 226 | return predict 227 | 228 | 229 | # In[29]: 230 | 231 | # extract_day_consumption_describe(30, 'is_week', train, predict) 232 | 233 | 234 | # In[30]: 235 | 236 | for f in use_holiday_fetures: 237 | for span in time_spans: 238 | if f != 'is_weekend': # compelete same with is_week 239 | predict = extract_day_consumption_describe(span, f, train, predict) 240 | 241 | 242 | # # 总数统计特征 243 | 244 | # In[31]: 245 | 246 | used_sum_fetures = [ 247 | 'dayofweek', 'dayofyear', 'days_in_month', 248 | 'quarter', 'week', 249 | 'month', 'year' 250 | ] 251 | 252 | 253 | # In[32]: 254 | 255 | def extract_mean_consumption(train_df, feture, date_span, predict, div_times): 256 | column_fmt = 'post{}_{}_mean_consumption' 257 | df = train_df.groupby(feture)['power_consumption'].sum() / (date_span / div_times) 258 | df.name = column_fmt.format(date_span, feture) 259 | predict = predict.join(df) 260 | return predict 261 | 262 | 263 | # In[33]: 264 | 265 | def extract_post_mean_consumption(feture, div_times, date_span, train, predict): 266 | train_df = get_train_df(30, train) 267 | add_feture_in_date(used_sum_fetures, predict, train_df) 268 | predict = extract_mean_consumption(train_df, feture, date_span, predict, div_times) 269 | return predict 270 | 271 | 272 | # In[34]: 273 | 274 | def extract_all_post_mean_consumption(used_sum_feture_model, train, predict): 275 | for feture, div_times, date_spans in used_sum_feture_model: 276 | for date_span in date_spans: 277 | predict = extract_post_mean_consumption(feture, div_times, date_span, train, predict) 278 | return predict 279 | 280 | 281 | # In[35]: 282 | 283 | used_sum_feture_model = [ 284 | ('dayofweek', 7, [30, 60, 90, 180, 360]), 285 | ('dayofyear', 1, [30, 60, 90, 180, 360]), 286 | ('days_in_month', 30, [30, 60, 90, 180, 360]), 287 | ('quarter', 90, [90, 180, 360]), 288 | ('week', 52, [180, 360]), 289 | ('month', 30, [30, 60, 90, 120, 240, 360]), 290 | ('year', 360, [ 360]), 291 | ] 292 | 293 | 294 | # In[36]: 295 | 296 | # predict = extract_all_post_mean_consumption(used_sum_feture_model, train, predict) # over fit 297 | 298 | 299 | # In[37]: 300 | 301 | # extract_day_consumption_describe(30, 'dayofweek', train, predict, add_dt_feture=True) 302 | 303 | 304 | # In[38]: 305 | 306 | for f in used_sum_fetures: 307 | for span in time_spans: 308 | predict = extract_day_consumption_describe(span, f, train, predict, add_dt_feture=True) 309 | 310 | 311 | # # 天气特征 312 | 313 | # In[39]: 314 | 315 | header = ['weather_date', 'weather_max', 'weather_min', 'weather_type', 'weather_wind', 'wind_type'] 316 | 317 | 318 | # In[40]: 319 | 320 | weather_df = pd.read_csv('../dataset/yangzhong.csv', header=None, names=header, parse_dates=['weather_date']) 321 | weather_df.head(3) 322 | 323 | 324 | # #### 将天气切分成块,再提取块边界 325 | 326 | # In[41]: 327 | 328 | weather_df.weather_min = pd.cut(weather_df.weather_min, bins=10) 329 | 330 | 331 | # In[42]: 332 | 333 | weather_df.weather_min = weather_df.weather_min.str.extract('\((-?\d+\.?\d*),').astype(np.float) 334 | 335 | 336 | # In[43]: 337 | 338 | weather_df.weather_max = pd.cut(weather_df.weather_max, bins=10) 339 | 340 | 341 | # In[44]: 342 | 343 | weather_df.weather_max = weather_df.weather_max.str.extract(', (-?\d+\.?\d*)\]').astype(np.float) 344 | 345 | 346 | # In[45]: 347 | 348 | weather_df.shape 349 | 350 | 351 | # In[46]: 352 | 353 | weather_type_count = weather_df.weather_type.value_counts() 354 | 355 | 356 | # In[47]: 357 | 358 | weather_df.weather_type = weather_df.weather_type.replace([ 359 | x for x in weather_type_count.loc[weather_type_count < 2].index 360 | ], 'rare_weather') 361 | 362 | 363 | # In[48]: 364 | 365 | weather_df.loc[weather_df.weather_type.str.contains('阵雨'), 'weather_type'] = 'showers_weather' 366 | 367 | 368 | # In[49]: 369 | 370 | weather_df.loc[weather_df.weather_type.str.contains('雨'), 'weather_type'] = 'rain_weather' 371 | 372 | 373 | # In[50]: 374 | 375 | weather_df.loc[~weather_df.weather_type.str.islower(), 'weather_type'] = 'fine_weather' 376 | 377 | 378 | # In[51]: 379 | 380 | weather_df = weather_df.join(pd.get_dummies(weather_df.weather_type)) 381 | 382 | 383 | # In[75]: 384 | 385 | weather_num_columns = [ 386 | 'weather_max', 'weather_min', 'wind_type', 'fine_weather', 'rain_weather', 387 | 'rare_weather', 'showers_weather' 388 | ] 389 | 390 | 391 | # In[53]: 392 | 393 | weather_df.head() 394 | 395 | 396 | # In[54]: 397 | 398 | weather_df.weather_wind.replace('东南风', 'southeast_wind', inplace=True) 399 | weather_df.weather_wind.replace('东北风', 'northeast_wind', inplace=True) 400 | weather_df.weather_wind.replace('西南风', 'southwest_wind', inplace=True) 401 | weather_df.weather_wind.replace('西北风', 'northwest_wind', inplace=True) 402 | weather_df.weather_wind.replace('东风', 'east_wind', inplace=True) 403 | weather_df.weather_wind.replace('北风', 'north_wind', inplace=True) 404 | weather_df.weather_wind.replace('南风', 'south_wind', inplace=True) 405 | weather_df.weather_wind.replace('西风', 'west_wind', inplace=True) 406 | 407 | 408 | # In[55]: 409 | 410 | weather_df.weather_wind.replace(['3-4级', '暂无实况', '无持续风向'], 'unknow_wind', inplace=True) 411 | 412 | 413 | # In[56]: 414 | 415 | weather_df = weather_df.join(pd.get_dummies(weather_df.weather_wind)) 416 | 417 | 418 | # In[57]: 419 | 420 | wind_type_count = weather_df.wind_type.value_counts() 421 | 422 | 423 | # In[58]: 424 | 425 | weather_df.wind_type = weather_df.wind_type.replace([ 426 | x for x in wind_type_count.loc[wind_type_count < 5 ].index 427 | ], 'rare_wind') 428 | 429 | 430 | # In[59]: 431 | 432 | weather_df.wind_type.replace('rare_wind', 0, inplace=True) 433 | weather_df.wind_type.replace('微风', 1, inplace=True) 434 | weather_df.wind_type.replace('1级', 2, inplace=True) 435 | weather_df.wind_type.replace('2级', 3, inplace=True) 436 | weather_df.wind_type.replace('小于3级', 4, inplace=True) 437 | weather_df.wind_type.replace('3级', 6, inplace=True) 438 | weather_df.wind_type.replace('3-4级转小于3级', 5, inplace=True) 439 | weather_df.wind_type.replace('3-4级', 7, inplace=True) 440 | weather_df.wind_type.replace('4-5级转3-4级', 8, inplace=True) 441 | weather_df.wind_type.replace('4-5级', 9, inplace=True) 442 | 443 | 444 | # In[60]: 445 | 446 | weather_df.wind_type.value_counts() 447 | 448 | 449 | # In[61]: 450 | 451 | weather_df.head() 452 | 453 | 454 | # In[62]: 455 | 456 | train = train.join(weather_df.set_index('weather_date'), on='record_date') 457 | 458 | 459 | # In[63]: 460 | 461 | predict = predict.join(weather_df.set_index('weather_date'), on='predict_date') 462 | 463 | 464 | # ### 添加天气特征(超前) 465 | 466 | # In[64]: 467 | 468 | use_weather_fetures = ['weather_max', 'weather_min', 'weather_type', 'weather_wind', 'wind_type'] 469 | 470 | 471 | # In[65]: 472 | 473 | for date_span in time_spans: 474 | predict = extract_post_day_describe( 475 | date_span, 476 | train, 477 | predict, 478 | use_weather_fetures, 479 | add_feture=False) 480 | 481 | 482 | # In[66]: 483 | 484 | # extract_day_consumption_describe(30, 'weather_max', train, predict, add_dt_feture=False) 485 | 486 | 487 | # In[67]: 488 | 489 | for f in use_weather_fetures: 490 | for span in time_spans: 491 | predict = extract_day_consumption_describe(span, f, train, predict, add_dt_feture=False) 492 | 493 | 494 | # # 交叉特征 495 | 496 | # ## 时间 X 假期 497 | 498 | # In[73]: 499 | 500 | def extract_combin_feture_day_consumption_describe(span, f1, f2, predict, train, f1_need_add=True, f2_need_add=False): 501 | tdf = get_train_df(span, train) 502 | if f1_need_add: 503 | add_feture_in_date([f1], predict, tdf) 504 | if f2_need_add: 505 | add_feture_in_date([f2], predict, tdf) 506 | new_feture_name = 'post{}_combine_{}_and_{}'.format(span, f1, f2) 507 | predict[new_feture_name] = predict[f1] * predict[f2] 508 | tdf[new_feture_name] = tdf[f1] * tdf[f2] 509 | predict = extract_day_consumption_describe(span, new_feture_name, tdf, predict) 510 | return predict 511 | 512 | 513 | # In[69]: 514 | 515 | # extract_combin_feture_day_consumption_describe(30, 'dayofweek', 'is_week', predict, train) 516 | 517 | 518 | # In[74]: 519 | 520 | for span in time_spans: 521 | for f1 in used_data_fetures: 522 | for f2 in use_holiday_fetures: 523 | predict = extract_combin_feture_day_consumption_describe(span, f1, f2, predict, train) 524 | 525 | 526 | # ## 时间 X 天气 527 | 528 | # In[78]: 529 | 530 | for span in time_spans: 531 | for f1 in used_data_fetures: 532 | for f2 in weather_num_columns: 533 | predict = extract_combin_feture_day_consumption_describe(span, f1, f2, predict, train) 534 | 535 | 536 | # ## 天气 X 假期 537 | 538 | # In[79]: 539 | 540 | for span in time_spans: 541 | for f1 in weather_num_columns: 542 | for f2 in use_holiday_fetures: 543 | predict = extract_combin_feture_day_consumption_describe(span, f1, f2, predict, train, False, False) 544 | 545 | 546 | # In[ ]: 547 | 548 | not_use_columns = ['weather_type', 'weather_wind'] 549 | 550 | 551 | # In[ ]: 552 | 553 | # data_feture_not_use = [x for x in used_data_fetures if x.startswith('is_')] 554 | 555 | 556 | # In[ ]: 557 | 558 | feture_columns = [x for x in predict.columns if x not in not_use_columns ] 559 | 560 | 561 | # In[ ]: 562 | 563 | # feture_columns = [x for x in feture_columns if x not in use_weather_fetures] 564 | 565 | 566 | # In[ ]: 567 | 568 | # feture_columns = [x for x in feture_columns if x not in use_holiday_fetures] 569 | 570 | 571 | # In[ ]: 572 | 573 | 'weather_max' in feture_columns, 'is_week' in feture_columns 574 | 575 | 576 | # In[ ]: 577 | 578 | predict.to_csv(feture_path, columns=feture_columns, index=False) 579 | 580 | -------------------------------------------------------------------------------- /code/holiday_crawl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 54, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "import pandas as pd\n", 13 | "import json\n", 14 | "from os.path import exists" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 59, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "def write_to_file(filename, data):\n", 26 | " if not exists(filename):\n", 27 | " with open(filename, 'w') as f:\n", 28 | " f.write('date,holiday\\n') \n", 29 | " for k, v in json.loads(data).items():\n", 30 | " if type(v) == str:\n", 31 | " v = int(v)\n", 32 | " with open(filename, 'a') as f:\n", 33 | " f.write('{},{}\\n'.format(k, v)) " 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "data = pd.date_range('2015/1/1', '2016/12/31')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "date_ags = data.map(lambda x:x.strftime('%Y%m%d'))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 19, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "url_fmt = 'http://tool.bitefu.net/jiari/?d={}'" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 30, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "agrv = ','.join(date_ags[:100])" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 31, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "'http://tool.bitefu.net/jiari/?d=20150101,20150102,20150103,20150104,20150105,20150106,20150107,20150108,20150109,20150110,20150111,20150112,20150113,20150114,20150115,20150116,20150117,20150118,20150119,20150120,20150121,20150122,20150123,20150124,20150125,20150126,20150127,20150128,20150129,20150130,20150131,20150201,20150202,20150203,20150204,20150205,20150206,20150207,20150208,20150209,20150210,20150211,20150212,20150213,20150214,20150215,20150216,20150217,20150218,20150219,20150220,20150221,20150222,20150223,20150224,20150225,20150226,20150227,20150228,20150301,20150302,20150303,20150304,20150305,20150306,20150307,20150308,20150309,20150310,20150311,20150312,20150313,20150314,20150315,20150316,20150317,20150318,20150319,20150320,20150321,20150322,20150323,20150324,20150325,20150326,20150327,20150328,20150329,20150330,20150331,20150401,20150402,20150403,20150404,20150405,20150406,20150407,20150408,20150409,20150410'" 91 | ] 92 | }, 93 | "execution_count": 31, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "url = url_fmt.format(agrv)\n", 100 | "url" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 63, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "for i in range(0, date_ags.shape[0], 100):\n", 112 | " agrv = ','.join(date_ags[i:(i+100)])\n", 113 | " url = url_fmt.format(agrv)\n", 114 | " res = requests.get(url)\n", 115 | " write_to_file('../dataset/holiday.csv', res.text)" 116 | ] 117 | } 118 | ], 119 | "metadata": { 120 | "anaconda-cloud": {}, 121 | "hide_input": false, 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.5.1" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /code/predict_result.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "/home/zhanglun/.pyenv/versions/anaconda3-4.1.0/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 15 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import pickle\n", 21 | "\n", 22 | "import numpy as np\n", 23 | "import scipy as sp\n", 24 | "import pandas as pd\n", 25 | "\n", 26 | "from sklearn.ensemble import RandomForestRegressor\n", 27 | "from sklearn.metrics import r2_score, roc_auc_score\n", 28 | "from sklearn import cross_validation\n", 29 | "from sklearn.model_selection import GridSearchCV\n", 30 | "from sklearn.svm import SVR " 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "model_path = '../dataset/fetures/model.pkl'\n", 42 | "output_path = '../submit/Tianchi_power_predict_table.csv'" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "fetures = pd.read_csv('../dataset/fetures/test_feture.csv', parse_dates=['predict_date'])" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "model = pickle.load(open(model_path, 'rb'))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "train_x = fetures.drop(['predict_date', 'predict_power_consumption'], axis=1)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 6, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "fetures.predict_power_consumption = model.predict(train_x.fillna(0)).astype(np.int64)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 7, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "fetures.predict_date = fetures.predict_date.map(lambda x: x.strftime('%Y%m%d'))" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "3754006.806451613" 111 | ] 112 | }, 113 | "execution_count": 8, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "fetures['predict_power_consumption'].mean()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "fetures.to_csv(output_path, index=False, columns=['predict_date', 'predict_power_consumption'])" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "anaconda-cloud": {}, 136 | "hide_input": false, 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.5.1" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /code/result.txt: -------------------------------------------------------------------------------- 1 | 2017-06-10 17:56:11.519155 result: 0.9809955701469716 fetures: 702 post360_weekofyear_std:0.05983807806632941 , post360_week_std:0.05727341629026522 , post240_dayofyear_std:0.05700243617276306 , post360_dayofyear_75%:0.05470857403513062 , post360_dayofyear_std:0.05276191776918991 , post240_dayofyear_75%:0.05024778442182545 , post120_dayofyear_50%:0.04432864932493469 , post120_dayofyear_25%:0.030696122647652965 , post240_dayofyear_max:0.029791901464045943 , post360_dayofyear_max:0.029125100472333708 , post360_weekofyear_count:0.02428621103910183 , post360_week_count:0.024117039219537808 , post240_dayofyear_50%:0.023653997600368914 , post360_weekofyear_25%:0.02358902686612497 , post360_weekofyear_max:0.02339527645346069 , post360_week_25%:0.02167830480513648 , post360_dayofyear_50%:0.020969892112941615 , post360_weekofyear_mean:0.020865596411447317 , post360_week_mean:0.020820565163031164 , post240_week_std:0.020408987363339584 2 | 2017-06-10 17:56:57.531889 result: 0.9809955701469716 fetures: 702 post360_weekofyear_std:0.05983807806632941 , post360_week_std:0.05727341629026522 , post240_dayofyear_std:0.05700243617276306 , post360_dayofyear_75%:0.05470857403513062 , post360_dayofyear_std:0.05276191776918991 , post240_dayofyear_75%:0.05024778442182545 , post120_dayofyear_50%:0.04432864932493469 , post120_dayofyear_25%:0.030696122647652965 , post240_dayofyear_max:0.029791901464045943 , post360_dayofyear_max:0.029125100472333708 , post360_weekofyear_count:0.02428621103910183 , post360_week_count:0.024117039219537808 , post240_dayofyear_50%:0.023653997600368914 , post360_weekofyear_25%:0.02358902686612497 , post360_weekofyear_max:0.02339527645346069 , post360_week_25%:0.02167830480513648 , post360_dayofyear_50%:0.020969892112941615 , post360_weekofyear_mean:0.020865596411447317 , post360_week_mean:0.020820565163031164 , post240_week_std:0.020408987363339584 3 | 2017-06-10 17:58:23.692043 result: 0.9812260591191833 fetures: 702 post240_dayofyear_std:0.06061743530771681 , post360_dayofyear_75%:0.05758784744580659 , post360_weekofyear_std:0.05700257710150259 , post360_week_std:0.05475886198817952 , post120_dayofyear_50%:0.04903319222276939 , post240_dayofyear_75%:0.04740758625355467 , post360_dayofyear_std:0.04045348492237613 , post240_dayofyear_max:0.03420164113736823 , post120_dayofyear_25%:0.031313071544771066 , post360_week_mean:0.029646691307865077 , post240_dayofyear_50%:0.027636178147651215 , post360_dayofyear_max:0.027471913720788735 , post360_week_count:0.02657281098445252 , post360_week_max:0.025361665971013903 , post360_weekofyear_25%:0.025076057967035353 , post360_weekofyear_max:0.02439508472114531 , post360_weekofyear_mean:0.024281116428763357 , post360_weekofyear_count:0.022381714522338794 , post360_dayofyear_50%:0.01977948438253097 , post120_dayofyear_std:0.019053914376049304 4 | 2017-06-10 18:07:38.840265 result: (0.9812260591191833, 0.95092811109387121, 0.61914474576265377, 0.71653278548142452, 0.43749602639182639) fetures: 702 post240_dayofyear_std:0.06061743530771681 , post360_dayofyear_75%:0.05758784744580659 , post360_weekofyear_std:0.05700257710150259 , post360_week_std:0.05475886198817952 , post120_dayofyear_50%:0.04903319222276939 , post240_dayofyear_75%:0.04740758625355467 , post360_dayofyear_std:0.04045348492237613 , post240_dayofyear_max:0.03420164113736823 , post120_dayofyear_25%:0.031313071544771066 , post360_week_mean:0.029646691307865077 , post240_dayofyear_50%:0.027636178147651215 , post360_dayofyear_max:0.027471913720788735 , post360_week_count:0.02657281098445252 , post360_week_max:0.025361665971013903 , post360_weekofyear_25%:0.025076057967035353 , post360_weekofyear_max:0.02439508472114531 , post360_weekofyear_mean:0.024281116428763357 , post360_weekofyear_count:0.022381714522338794 , post360_dayofyear_50%:0.01977948438253097 , post120_dayofyear_std:0.019053914376049304 5 | 2017-06-10 18:13:41.316884 result: (0.9815575664129459, 0.94995628600124637, 0.61416807863600109, 0.75009225632557885, 0.40984849907331433) fetures: 702 post240_dayofyear_std:0.07093754904341293 , post360_dayofyear_75%:0.061197642670642656 , post360_weekofyear_std:0.05263520169823813 , post360_dayofyear_std:0.04887795505499953 , post360_week_std:0.04622601588097645 , post240_dayofyear_75%:0.04489649678455263 , post120_dayofyear_50%:0.040187236168927845 , post240_dayofyear_max:0.0369892481419167 , post360_weekofyear_max:0.034461899665024394 , post360_week_mean:0.0340585407347569 , post360_week_max:0.031065407256356273 , post360_week_count:0.030808708306986312 , post240_dayofyear_50%:0.025359374251381593 , post360_weekofyear_mean:0.025299285022859355 , post120_dayofyear_std:0.0219081681188889 , post360_weekofyear_count:0.021401989788210073 , post360_dayofyear_max:0.021353503033307436 , post360_weekofyear_25%:0.02073365144202253 , post240_weekofyear_std:0.020610251119400843 , post120_dayofyear_25%:0.019286342292937124 6 | 2017-06-10 21:15:10.157383 result: (0.9822051009352075, 0.95579364610049611, 0.7062930556330822, 0.81414454969936811, 0.56954695479573703) fetures: 898 post360_week_std:0.06762393904927758 , post240_dayofyear_std:0.05325587110851327 , post360_dayofyear_75%:0.05136696042942002 , post360_dayofyear_std:0.04943380259543345 , post360_weekofyear_std:0.04934004811353454 , post240_dayofyear_75%:0.04597445495162306 , post120_dayofyear_50%:0.044688318382925515 , post360_week_25%:0.02948095048177686 , post240_dayofyear_max:0.0289554116986942 , post360_week_count:0.02653548392148828 , post120_dayofyear_25%:0.02497912510445329 , is_festival:0.02482572307566017 , post360_weekofyear_25%:0.02323131702891193 , post360_week_mean:0.0232217056392544 , post360_dayofyear_max:0.022677801111633936 , is_week:0.020851448143867687 , post360_week_max:0.020792355477846667 , is_holiday:0.020423909005792922 , post360_weekofyear_max:0.020297191740446426 , post360_dayofyear_50%:0.020162771299375385 7 | 2017-06-10 21:36:00.079713 result: (0.9826812518986928, 0.95605369543960583, 0.68937185194605832, 0.8199889398361534, 0.52346412256255381) fetures: 868 post360_week_std:0.059184042232527816 , post360_dayofyear_std:0.05783583438730425 , post240_dayofyear_std:0.057574260680706646 , post240_dayofyear_75%:0.05160454714145169 , post360_week_max:0.045137841843380204 , post360_dayofyear_75%:0.04227164346666081 , post120_dayofyear_50%:0.03569903264919281 , post360_week_count:0.035174477706062024 , post360_weekofyear_std:0.03222649606930148 , post360_dayofyear_max:0.031951271707399885 , post360_weekofyear_count:0.031013369532032663 , post360_weekofyear_50%:0.030265111090275347 , post360_week_25%:0.028118094560222056 , post360_weekofyear_mean:0.026757913582540902 , is_festival:0.024883471277168966 , post240_dayofyear_max:0.02439109468279022 , post360_weekofyear_max:0.021607394475592873 , post240_dayofyear_50%:0.021476271530959577 , is_week:0.020138534061675803 , is_holiday:0.020072241653007562 8 | 2017-06-10 22:44:09.230867 result: (0.9810693443654728, 0.94978575722172542, 0.62406609817483327, 0.72017865084301147, 0.46713572931636471) fetures: 1104 post360_weekofyear_std:0.06948627102325142 , post240_dayofyear_std:0.05567522583766983 , post360_dayofyear_std:0.05323543240992519 , post240_dayofyear_75%:0.0517225978339905 , post360_dayofyear_75%:0.05171704142873725 , post120_dayofyear_50%:0.05020134036423885 , post360_week_std:0.049976129308657434 , post240_dayofyear_max:0.03095550481825238 , post360_week_25%:0.030547290124186784 , post360_dayofyear_max:0.029104558549925125 , post360_week_count:0.02797305019800481 , post120_dayofyear_25%:0.027965956286967103 , post360_week_mean:0.02626641749445518 , post240_dayofyear_50%:0.02226705197443881 , post360_weekofyear_mean:0.022078075749952266 , post360_dayofyear_50%:0.021391665624325422 , post360_weekofyear_25%:0.0205594437435275 , post240_weekofyear_std:0.01971365781031094 , post240_week_std:0.018831011385202025 , post360_week_max:0.018569535554401162 9 | 2017-06-10 22:54:10.250829 result: (0.982363706195949, 0.96066600069520058, 0.64939188461289943, 0.80822338640587588, 0.22162859500331256) fetures: 2208 post150_dayofyear_max:0.16010911457429855 , post150_dayofyear_std:0.15292631108579458 , post360_weather_max_count:0.037645671153132575 , post240_weather_max_std:0.03237362057319401 , post150_weather_max_max:0.027552593112726082 , post270_weather_max_std:0.025471248648629628 , post300_weather_max_std:0.02440157378458934 , post120_dayofyear_50%:0.020028400502903384 , post210_weather_max_std:0.017888068949600374 , post150_dayofyear_mean:0.017564641077130197 , post330_weather_max_std:0.013859076623724549 , post150_weather_max_std:0.011204284738801036 , post330_weather_max_count:0.01059584319752082 , post150_weather_max_count:0.009314193188305989 , post360_weekofyear_25%:0.008092697472318493 , post60_weather_max_std:0.00780814359692211 , post120_dayofyear_75%:0.007589314103314759 , post180_weather_max_std:0.0075460925315592575 , post240_weekofyear_std:0.007102608588847814 , post150_weather_max_mean:0.006962719280855951 10 | 2017-06-10 23:06:36.005611 result: (0.9824143773145466, 0.95931549164187524, 0.64046212967688765, 0.79641664067145457, 0.38358848051457639) fetures: 1728 post150_dayofyear_max:0.24589109189856068 , post150_dayofyear_std:0.22125646763239545 , post150_dayofyear_mean:0.03152945721600716 , post120_dayofyear_50%:0.016989123579456967 , post270_dayofyear_75%:0.01459198182447194 , post360_weekofyear_std:0.013079798772271058 , post360_week_max:0.011882143256492665 , post360_weekofyear_max:0.011258373307040648 , post240_weekofyear_std:0.011240134153628425 , post360_dayofyear_75%:0.009749179635727679 , post360_week_25%:0.009643561204603125 , post300_dayofyear_75%:0.008596215464555645 , post210_dayofyear_75%:0.008455530194865784 , post360_weekofyear_mean:0.008303488957485871 , post180_dayofyear_50%:0.00819432444982622 , post300_week_max:0.007911045044612583 , post150_dayofyear_50%:0.007773686877818734 , post150_dayofyear_25%:0.007649709348662322 , post180_dayofyear_75%:0.0075052377317228365 , post360_dayofyear_max:0.007393453393643394 11 | 2017-06-10 23:15:27.964041 result: (0.9817043822495296, 0.95580068018658104, 0.66005799770865203, 0.81533845753226031, 0.40983835558976345) fetures: 2208 post150_dayofyear_max:0.25638705349450625 , post150_dayofyear_std:0.22165863246951925 , post150_dayofyear_mean:0.01789594183099462 , post360_week_std:0.01494142111616415 , post120_dayofyear_50%:0.013747571545445494 , post150_dayofyear_25%:0.012606159293074258 , post360_weekofyear_std:0.012412248779932702 , post210_dayofyear_75%:0.011140208785617308 , post180_dayofyear_75%:0.011015741245508671 , post240_dayofyear_75%:0.00986211952643449 , post360_weekofyear_max:0.009666276834181161 , post300_dayofyear_75%:0.009558421526013045 , post360_dayofyear_75%:0.009319439013739243 , post270_dayofyear_75%:0.007669082305535447 , post330_dayofyear_75%:0.007541019805731144 , post360_week_25%:0.007126927259613465 , post180_dayofyear_25%:0.006882667588199024 , post300_dayofyear_50%:0.00659357818679421 , post360_week_max:0.006537911584139208 , post300_week_max:0.006438391179657396 12 | 2017-06-10 23:21:32.236102 result: (0.9824506245857773, 0.95924409064056793, 0.65242749790927546, 0.80825911122580352, 0.39922845296404402) fetures: 2208 post150_dayofyear_max:0.24005512463558326 , post150_dayofyear_std:0.2308148293765062 , post150_dayofyear_mean:0.03229727832730095 , post120_dayofyear_50%:0.016920113130830892 , post360_weekofyear_std:0.014667239930494982 , post180_dayofyear_75%:0.013149619830856779 , post360_week_max:0.012963540123670111 , post300_dayofyear_75%:0.011529695803939392 , post210_dayofyear_75%:0.009928935148667488 , post360_week_25%:0.009561354169231642 , post360_weekofyear_max:0.009214851291084073 , post360_dayofyear_75%:0.008817977596810429 , post210_dayofyear_50%:0.008698094521734645 , post360_weekofyear_mean:0.007794007459013409 , post240_dayofyear_75%:0.0076514494492410075 , post300_weekofyear_max:0.007548760921191867 , post240_weekofyear_std:0.00735092696081901 , post300_week_max:0.007214018895017005 , post270_dayofyear_75%:0.007138834537515316 , post180_dayofyear_max:0.00706807903131027 13 | 2017-06-10 23:45:42.921774 result: (0.9831875193825498, 0.95945394893937186, 0.686092964194567, 0.81812145631897049, 0.4700743846651656) (0.9831875193825498, 0.95945394893937186, 0.686092964194567, 0.81812145631897049, 0.4700743846651656) params: {'criterion': 'mae', 'max_features': 100, 'n_estimators': 100} 0.702487671874 fetures: 2208 post150_dayofyear_std:0.0330885869833952 , post150_dayofyear_max:0.029283462573992693 , post150_dayofyear_25%:0.021590656211323757 , post300_dayofyear_std:0.01771296856613513 , post360_weekofyear_max:0.017562392536192028 , post360_week_std:0.01662561736452066 , post360_weekofyear_mean:0.016408935057646807 , post180_dayofyear_std:0.015501636268463981 , post360_dayofyear_std:0.015005344722218634 , post150_dayofyear_mean:0.014911514712743516 , post120_dayofyear_75%:0.013539354778823016 , post210_dayofyear_std:0.013456791749897508 , post360_dayofyear_max:0.01321322947382893 , post210_dayofyear_75%:0.012718799593584563 , post360_week_count:0.011449106393858403 , post180_dayofyear_50%:0.011430833343111053 , post360_weekofyear_std:0.01111861692907194 , post300_dayofyear_75%:0.010965035796560068 , post330_dayofyear_std:0.01087378143369563 , post240_dayofyear_50%:0.010644420580679303 14 | 2017-06-10 23:53:26.929485 result: (0.9824345308664283, 0.95767507375570637, 0.68518882772904433, 0.81661477804353044, 0.44971268884566817) (0.9824345308664283, 0.95767507375570637, 0.68518882772904433, 0.81661477804353044, 0.44971268884566817) params: {'criterion': 'mse', 'n_estimators': 300, 'max_features': 'sqrt'} 0.70941280102 fetures: 2208 post150_dayofyear_max:0.0256325632387379 , post150_dayofyear_std:0.022164739381156744 , post150_dayofyear_25%:0.01887170334889061 , post360_week_count:0.013925713673282339 , post360_weekofyear_25%:0.013687948386007011 , post210_dayofyear_std:0.013512463760180381 , post360_weekofyear_std:0.013426669767689504 , post360_weekofyear_max:0.012316258668840058 , post120_dayofyear_75%:0.01225970144645653 , post330_dayofyear_max:0.012190256572543074 , post150_dayofyear_50%:0.011958771056427002 , post360_dayofyear_std:0.011845140641975187 , post150_dayofyear_mean:0.01171720352493833 , post300_dayofyear_std:0.011477847650876224 , post360_dayofyear_max:0.01133734649391173 , post150_dayofyear_75%:0.011327980674041449 , post360_week_std:0.011303593156636498 , post360_weekofyear_50%:0.011248646457708645 , post360_week_75%:0.011075548365801196 , post180_dayofyear_std:0.010474519204089896 15 | 2017-06-11 06:52:37.243021 result: (0.9831683882309213, 0.95980503571601361, 0.68308778066567644, 0.83110382797538129, 0.47037101851499641) params: {'criterion': 'mae', 'max_features': 50, 'n_estimators': 200} 0.701063463314 fetures: 3128 post150_dayofyear_std:0.022417681457041105 , post150_dayofyear_max:0.008964231866345035 , post450_week_25%:0.008679449276035188 , post420_dayofyear_std:0.008378380016366024 , post510_dayofyear_75%:0.008085424156377726 , post360_dayofyear_50%:0.008024360373351991 , post240_dayofyear_50%:0.007976264164219366 , post270_dayofyear_std:0.007899533878504372 , post180_dayofyear_std:0.00789909176487151 , post450_week_std:0.0076043298466074724 , post300_dayofyear_max:0.007592154036674166 , post480_weekofyear_75%:0.00745668852634958 , post390_week_std:0.007327618894522977 , post240_dayofyear_std:0.007305236062653214 , post450_weekofyear_std:0.007295992873653055 , post240_dayofyear_75%:0.0069641583407138745 , post270_dayofyear_50%:0.006866282241384245 , post450_weekofyear_min:0.006762876854847269 , post510_dayofyear_25%:0.006737314430020437 , post330_dayofyear_50%:0.00672964726717036 16 | 2017-06-11 08:08:48.972957 result: (0.9819418875392917, 0.95461724491246847, 0.65588617424282136, 0.78595625077613818, 0.36854251069819643) params: {'max_features': 50, 'n_estimators': 200, 'criterion': 'mae'} 0.657817091026 fetures: 3128 post150_dayofyear_std:0.020370902095754772 , post150_dayofyear_75%:0.010682919534475801 , post150_dayofyear_mean:0.010356030392783464 , post480_week_25%:0.008065823516717563 , post240_dayofyear_std:0.007880957768625314 , post150_dayofyear_50%:0.007759140214906504 , post450_week_25%:0.007694234383072841 , post480_weekofyear_75%:0.007559134348312591 , post270_dayofyear_max:0.007480062816737922 , post180_dayofyear_std:0.007426108808050903 , post300_dayofyear_max:0.007216634941450267 , post480_weekofyear_max:0.007051269818303007 , post150_dayofyear_25%:0.00673421691242628 , post390_dayofyear_25%:0.0067163490812291735 , post510_dayofyear_std:0.006716206210223072 , post150_dayofyear_max:0.006430728802393112 , post420_dayofyear_std:0.006333905057578368 , post240_dayofyear_max:0.006317673950045931 , post510_dayofyear_25%:0.006245357615756679 , post360_week_std:0.006192215862531334 17 | 2017-06-11 08:26:04.393517 result: 38.33806318% (0.9826145715111593, 0.95718188839586338, 0.69169719365803672, 0.84069560611894567, 0.41273760426445294) params: {'max_features': 10, 'criterion': 'mae', 'n_estimators': 200} 0.676267991545 fetures: 3128 post420_dayofyear_std:0.008497464164185122 , post240_dayofyear_std:0.007893754602188318 , post480_week_50%:0.007858500539276064 , post180_dayofyear_std:0.0074319413130420385 , post300_dayofyear_std:0.007418788393584342 , post180_dayofyear_max:0.00711614392587529 , post150_dayofyear_std:0.006997454804338879 , post360_dayofyear_std:0.006809163610892495 , post360_week_75%:0.006778430863722091 , post270_week_max:0.006740857018045632 , post360_dayofyear_50%:0.006577623890052819 , post450_dayofyear_max:0.006559779559263823 , post390_dayofyear_75%:0.006499069222878901 , post510_dayofyear_std:0.0064129174153285 , post480_dayofyear_max:0.0062913998389055445 , post510_dayofyear_max:0.00619970746089251 , post360_dayofyear_mean:0.006107403142955725 , post390_dayofyear_50%:0.00603185050789311 , post510_dayofyear_50%:0.006027622495142437 , post210_dayofyear_std:0.005946079297951164 18 | 2017-06-11 08:47:43.883578 result: (0.9823636978086018, 0.95489379696615817, 0.62649701279962344, 0.85489533006525442, 0.30121598149268813) params: {'criterion': 'mae', 'n_estimators': 100, 'max_features': 10} 0.675404347387 fetures: 3128 post240_dayofyear_std:0.009192095885714542 , post390_dayofyear_50%:0.008651182677963255 , post480_week_50%:0.00855821041340417 , post420_dayofyear_std:0.007845265040992835 , post180_dayofyear_std:0.0077120755814341305 , post420_dayofyear_50%:0.007344936201838517 , post210_dayofyear_std:0.0073311293971738455 , post510_week_mean:0.007197798719444195 , post480_dayofyear_max:0.007164725263317988 , post330_dayofyear_50%:0.007066969554275121 , post180_dayofyear_mean:0.0069429905206699625 , post120_dayofyear_50%:0.006831209231015566 , post390_dayofyear_std:0.00675543155766053 , post240_weekofyear_mean:0.006648836744704547 , post360_dayofyear_std:0.006551403352791161 , post210_dayofyear_50%:0.006418894936123176 , post240_dayofyear_mean:0.006339732474432532 , post300_dayofyear_std:0.006286731820013665 , post240_dayofyear_50%:0.00625809663649062 , post510_weekofyear_count:0.006237365281067994 19 | 2017-06-11 08:58:51.649481 result: (0.9817621367190471, 0.95666966094253481, 0.62237768632453883, 0.83980002881387417, 0.35138731659108202) params: {'criterion': 'mae', 'n_estimators': 100, 'max_features': 100} 0.656882890725 fetures: 3128 post150_dayofyear_std:0.015542910685266946 , post150_dayofyear_50%:0.014759982697243832 , post150_dayofyear_mean:0.013545261201333545 , post150_dayofyear_25%:0.01236966761255613 , post150_dayofyear_max:0.011920579268849587 , post420_week_25%:0.010954559139424938 , post180_dayofyear_25%:0.010497432544783213 , post150_dayofyear_75%:0.00970027755521497 , post360_dayofyear_max:0.009518843644578893 , post480_dayofyear_25%:0.009043584689071894 , post480_week_mean:0.00882494568153439 , post450_week_25%:0.008615583996591175 , post210_dayofyear_25%:0.008045482435829329 , post360_dayofyear_std:0.007964959618029484 , post330_dayofyear_max:0.007836547041113145 , post510_dayofyear_mean:0.007791970054728534 , post390_dayofyear_25%:0.007726349346811068 , post480_week_25%:0.007699526984838296 , post390_weekofyear_75%:0.007561403892430957 , post510_week_25%:0.007401438532174857 20 | 2017-06-11 15:58:19.501731 result: (0.9814349581689806, 0.95576951264114907, 0.61674049244621287, 0.73368932669434939, 0.5102332296929849) params: {'max_features': 50, 'criterion': 'mae', 'n_estimators': 100} 0.629054129826 fetures: 3128 post150_dayofyear_mean:0.014807172835698192 , post150_dayofyear_50%:0.013605929680389157 , post510_dayofyear_std:0.011998228766294779 , post480_week_mean:0.01104928578506366 , post240_dayofyear_std:0.010920012257339112 , post150_dayofyear_std:0.010487458558428324 , post450_week_25%:0.010252480949790194 , post150_dayofyear_25%:0.009360222139004311 , post420_week_25%:0.00798410875661814 , post150_dayofyear_75%:0.007869174735869134 , post510_dayofyear_count:0.007721122815259382 , post180_dayofyear_std:0.007646612759851348 , post510_dayofyear_25%:0.007413240322320549 , post270_dayofyear_max:0.007378546552945636 , post420_dayofyear_std:0.007249181136462292 , post240_dayofyear_max:0.007226312648989882 , post480_dayofyear_25%:0.007070355456462329 , post270_dayofyear_25%:0.006909248872775168 , post420_weekofyear_75%:0.006903978073348 , post390_week_count:0.006882762448960537 21 | 2017-06-11 16:33:37.162570 result: (0.9816843324397545, 0.90515435921606147, 0.34905165144414896, 0.4671767465679193, 0.038083361996820608) params: {'n_estimators': 100, 'criterion': 'mae', 'max_features': 'sqrt'} 0.425271058438 fetures: 2576 post240_weather_max_std:0.012864649575464859 , post270_weekofyear_std:0.0126198860921078 , post360_weather_max_max:0.010935212787435826 , post30_weather_min_std:0.010769899381306085 , post390_weather_max_std:0.010519958358186351 , post300_weather_max_max:0.010432196502535225 , post390_weather_max_max:0.010428378496249273 , post270_week_std:0.01030731990801671 , post150_weekofyear_75%:0.009745472574313243 , post360_week_max:0.009327295806867949 , post330_weather_max_std:0.009080110129851581 , post120_weather_max_25%:0.009001006770954039 , post120_weather_max_min:0.00879535211580983 , post150_weather_max_count:0.00877089502845152 , post120_weekofyear_min:0.008741762775095698 , post360_weekofyear_max:0.008721351251843831 , post300_week_std:0.008613261447946932 , post420_weather_min_max:0.008610996373947242 , post30_weather_min_max:0.008345216958299066 , post210_weekofyear_std:0.008289296068348771 22 | 2017-06-11 16:40:47.595154 result: (0.9810588480757253, 0.9009934953760721, 0.47064635514010245, 0.77357657917553135, 0.079661131009523656) params: {'max_features': 10, 'criterion': 'mae', 'n_estimators': 100} 0.486083862193 fetures: 2576 post150_weekofyear_mean:0.009855215160781446 , post270_week_max:0.009424304452172745 , post150_weather_max_std:0.009179089897462033 , post390_weather_max_std:0.00882516862532233 , post270_week_std:0.008645072333986885 , post30_weather_min_mean:0.008559025833251774 , post120_weather_max_25%:0.008226272369590993 , post240_weather_max_mean:0.00744704933762555 , post30_weather_min_max:0.0070183445186746615 , post180_week_max:0.0069845468590864004 , post150_weather_min_count:0.006816804954875264 , post210_weather_min_max:0.006768159980661166 , post330_week_25%:0.006710496086316952 , post360_week_std:0.006707748614139498 , post30_weather_min_count:0.006610109615358368 , post360_dayofyear_max:0.006600819659005956 , post420_week_max:0.0065661221188512385 , post390_week_max:0.006525288637539832 , post330_weekofyear_mean:0.006243911853077088 , post210_week_max:0.006066934639645646 23 | 2017-06-11 19:11:56.325843 result: (0.9784196521092927, 0.94554580196881099, 0.53083376740955912, 0.76560963212100075, 0.23026712372690553) params: {'n_estimators': 100} 0.595904515492 fetures: 947 post30_dayofyear_25%:0.11811921791641394 , post30_dayofyear_count:0.07579042696019553 , post90_weather_max_std:0.056591444366807 , post30_dayofyear_std:0.046026138026924256 , post60_weather_max_std:0.04162110461211387 , post30_dayofyear_mean:0.025382906526467672 , post180_dayofweek_50%:0.022430012564422307 , post180_dayofyear_mean_consumption:0.021604213790490507 , post360_dayofyear_mean_consumption:0.018768060346445497 , post30_dayofyear_50%:0.017633352968677737 , post30_dayofyear_max:0.01680462663812277 , post30_dayofyear_mean_consumption:0.015237032570365959 , post360_weather_min_mean:0.014943015954353099 , post60_weather_max_75%:0.014212583959111916 , post360_weather_min_std:0.012485563991450574 , post60_dayofyear_mean_consumption:0.011525453469041776 , post180_dayofyear_25%:0.010847699219107021 , post180_weather_max_75%:0.010164706192497814 , post90_weather_max_75%:0.009370928423593057 , post90_weather_min_std:0.00932922385884032 24 | 2017-06-11 19:29:49.236588 result: (0.9785965129060097, 0.94653241011561007, 0.53755270245661246, 0.76393584791510372, 0.25302397234282148) params: {'criterion': 'mse', 'n_estimators': 300} 0.596969059163 fetures: 947 post30_dayofyear_25%:0.09167544904606786 , post30_dayofyear_count:0.08591894356458817 , post90_weather_max_std:0.06076611161894268 , post30_dayofyear_std:0.046122230531716935 , post60_weather_max_std:0.03888173093931677 , post30_dayofyear_max:0.031669988689992434 , post180_dayofweek_50%:0.02399328462044762 , post180_dayofyear_mean_consumption:0.020319509482770916 , post30_dayofyear_50%:0.017601774067209627 , post30_dayofyear_mean:0.017218235631364813 , post90_dayofyear_mean_consumption:0.015498407303676718 , post30_dayofyear_mean_consumption:0.014988022017633964 , post360_weather_min_mean:0.014787352206729346 , post360_weather_min_std:0.013178957455030355 , post60_weather_max_75%:0.013066742045006054 , post60_dayofyear_mean_consumption:0.012579454319031442 , post180_dayofyear_25%:0.01194076805497424 , post360_dayofyear_mean_consumption:0.011908989472083768 , post180_weather_max_75%:0.011077881804171468 , post90_weather_min_std:0.011029403488666262 25 | 2017-06-11 20:04:12.628598 result: (0.979350828168523, 0.94996825171369259, 0.52923490324997391, 0.76152951699607996, 0.22204518053803912) params: {'max_features': 100, 'n_estimators': 500} 0.545932166442 fetures: 947 post30_dayofyear_mean:0.03292709069686641 , post30_dayofyear_mean_consumption:0.029643831644066528 , post360_dayofyear_mean_consumption:0.02770423732034277 , post30_dayofyear_std:0.027549057148796817 , post180_dayofyear_mean_consumption:0.027093311999831674 , post60_dayofyear_mean_consumption:0.026930525487501192 , post30_dayofyear_count:0.025774574886522947 , post30_dayofyear_75%:0.025636980550907962 , post30_dayofyear_max:0.023392608001917482 , post30_dayofyear_25%:0.022398460960013456 , post30_dayofyear_50%:0.02203899787249782 , post90_weather_max_std:0.02042789461889396 , post60_weather_max_std:0.019837906795038853 , post90_dayofyear_mean_consumption:0.017000500001235756 , post30_dayofyear_min:0.01588341954651439 , post60_weather_max_max:0.014604075074070603 , post90_weather_max_max:0.010931682451005266 , post90_weather_min_max:0.010719345903978023 , post90_weather_max_75%:0.009568806874345834 , post90_weather_min_std:0.009284540257766132 26 | 2017-06-11 20:52:11.154737 result: (0.9805667292777991, 0.9528420677391769, 0.51928078952621559, 0.80357155376042555, 0.098488845197916675) params: {'max_features': 100, 'n_estimators': 100} 0.61073342904 fetures: 947 post360_dayofyear_mean_consumption:0.03704127417131781 , post30_dayofyear_mean:0.034040630835957056 , post30_dayofyear_mean_consumption:0.03210342153072227 , post180_dayofyear_mean_consumption:0.03208407181088475 , post60_dayofyear_mean_consumption:0.028872098095710272 , post90_dayofyear_mean_consumption:0.024996829126474912 , post180_dayofyear_min:0.023715415253222343 , post30_dayofyear_25%:0.02272970173784293 , post180_dayofyear_count:0.02254698798726697 , post90_dayofyear_min:0.02062065977948385 , post30_dayofyear_std:0.020078511839485735 , post30_dayofyear_75%:0.019841721598996668 , post60_dayofyear_min:0.015718063029756937 , post30_dayofyear_min:0.015673031703683372 , post60_weather_max_mean:0.014934283181800512 , post360_weather_max_max:0.014130325246507858 , post360_dayofyear_count:0.013041758458654261 , post60_weather_max_std:0.012432604903157847 , post60_weather_max_50%:0.011893318693613278 , post30_dayofyear_50%:0.010682019044875845 27 | 2017-06-11 21:14:08.918051 result: (0.9796536193377529, 0.94988101646402312, 0.49657570196083539, 0.80299260406147277, 0.14774362853862943) params: {'n_estimators': 300, 'max_features': 100} 0.568236803016 fetures: 947 post30_dayofyear_mean:0.04497040334629134 , post60_dayofyear_mean_consumption:0.03916903161800868 , post30_dayofyear_mean_consumption:0.031710198517904555 , post180_dayofyear_mean_consumption:0.031109698338844995 , post360_dayofyear_mean_consumption:0.029934374947765262 , post30_dayofyear_75%:0.028947422563007745 , post90_dayofyear_mean_consumption:0.024697913691739386 , post30_dayofyear_25%:0.02260825137530281 , post30_dayofyear_std:0.021892750568281452 , post30_dayofyear_max:0.018446171965005188 , post90_weather_max_std:0.015626536232576142 , post30_dayofyear_count:0.015376368397690627 , post360_weather_max_max:0.014511203842828098 , post30_dayofyear_50%:0.01401841199589714 , post360_weather_max_std:0.013878619442006015 , post30_dayofyear_min:0.012756048357801452 , post90_weather_max_25%:0.01268616867979231 , post90_weather_max_max:0.012251838601502649 , post360_weather_max_25%:0.012127789019530086 , post60_weather_max_max:0.01134733090872791 28 | 2017-06-11 22:05:28.309707 result: (0.9860422278222353, 0.97538373196296657, 0.72281602842497583, 0.90777480934263699, 0.46913574740101072) params: {'n_estimators': 200, 'max_features': 100} 0.757100555647 fetures: 947 post360_is_festival_mean:0.051585150706398394 , post30_is_week_std:0.02815793070171819 , post30_is_holiday_std:0.027995437386961308 , post90_dayofyear_mean_consumption:0.025973191190086443 , post180_dayofyear_mean_consumption:0.02447763976522031 , post360_dayofyear_mean_consumption:0.0171762447855321 , post30_dayofyear_mean_consumption:0.0165617283461739 , post60_is_week_std:0.01515694397368341 , post90_is_month_end_std:0.011801336931753064 , post360_is_festival_75%:0.011193087365073111 , post90_is_year_start_max:0.010451485991394378 , post60_is_year_start_max:0.010268371091034042 , post60_dayofyear_mean_consumption:0.009455856333778252 , post60_is_festival_std:0.009032913608421537 , post30_is_weekend_std:0.00892505027556921 , post360_is_festival_50%:0.008745003892332374 , post30_dayofweek_std:0.0085378125824328 , post30_is_year_start_std:0.00849014511263141 , post360_weekofyear_std:0.00841137041836314 , post360_is_year_start_count:0.008197045563485762 29 | 2017-06-11 23:26:37.041631 result: (0.9859796371561408, 0.9754629518756025, 0.71896196585158012, 0.90887844280777552, 0.48842980444506767) params: {'max_features': 100, 'n_estimators': 500} 0.743003458272 fetures: 882 post366_is_festival_mean:0.04721796907113417 , post366_is_festival_count:0.036851157336773376 , post30_is_holiday_std:0.03639024586547476 , post360_dayofyear_mean_consumption:0.02582299981241632 , post30_is_week_std:0.025110922405546087 , post60_dayofyear_mean_consumption:0.02067480042687998 , post180_dayofyear_mean_consumption:0.017889890325557704 , post90_dayofyear_mean_consumption:0.017332422986329362 , post30_dayofyear_mean_consumption:0.016831391647010974 , post366_week_std:0.01072675522161596 , post60_is_festival_25%:0.010072623362054013 , post366_is_month_start_mean:0.009655833457309049 , post90_is_year_start_max:0.009624668374092649 , post90_is_quarter_start_std:0.009355447197347547 , post60_is_holiday_std:0.008616167666026213 , post30_dayofweek_std:0.008457193856671786 , post366_is_year_start_mean:0.008360823934299535 , post366_weekofyear_std:0.007953899430910173 , post30_is_year_start_std:0.006969789588555151 , post180_is_year_start_std:0.006852640492247664 30 | 2017-06-11 23:51:24.220140 result: (0.9860098827620267, 0.97493707538390306, 0.69982191346224276, 0.89222561614429197, 0.41652547120700678) params: {'max_features': 100, 'n_estimators': 500} 0.744737662618 fetures: 882 post366_is_festival_mean:0.04785729282430978 , post366_is_festival_count:0.037254644835714716 , post30_is_holiday_std:0.035718516225495024 , post30_is_week_std:0.02917860355956139 , post90_is_year_start_max:0.014104182303085307 , post366_is_year_start_count:0.011621301926753383 , post60_is_year_start_max:0.009972666493059302 , post90_is_month_start_std:0.009771885074172862 , post366_is_month_start_mean:0.009694197503834653 , post60_is_festival_25%:0.009659546055855069 , post180_is_month_end_std:0.009375579410585722 , post30_dayofweek_std:0.008812767342725448 , post90_is_month_end_std:0.00868900422020309 , post60_is_holiday_std:0.008605141220977287 , post30_dayofyear_mean_consumption:0.007769569461819031 , post60_is_week_std:0.007420453968558346 , post30_is_weekend_std:0.006848387056812638 , post366_dayofyear_std:0.006844018626798128 , post90_dayofyear_mean_consumption:0.006075090620651104 , post180_weather_min_50%:0.005969549182348032 31 | 2017-06-12 00:05:43.794702 result: (0.9860706488665998, 0.97534921354359272, 0.72078167119037273, 0.90363782784957447, 0.48863810474133329) params: {'n_estimators': 200, 'max_features': 50} 0.743158037935 fetures: 882 post366_is_festival_mean:0.03649376299105978 , post30_is_holiday_std:0.02258167887820268 , post30_is_week_std:0.022044272893742566 , post360_dayofyear_mean_consumption:0.019963456957026712 , post366_is_festival_count:0.01882470095489192 , post90_is_year_start_max:0.01633665204149714 , post180_dayofyear_mean_consumption:0.01567402044806286 , post60_dayofyear_mean_consumption:0.013673382475315347 , post60_is_festival_25%:0.010670982718640002 , post30_dayofyear_mean_consumption:0.010559358327522162 , post366_is_month_start_mean:0.009480563553280058 , post60_wind_type_75%:0.009216229588183326 , post90_dayofyear_mean_consumption:0.008846540951297997 , post366_week_std:0.008812853971518767 , post30_is_weekend_std:0.008470241561017753 , post60_is_week_std:0.008188887934216849 , post60_is_holiday_std:0.007627849145458359 , post366_is_month_end_mean:0.007337613834769441 , post60_is_festival_std:0.007012774808170126 , post366_dayofyear_std:0.006611882995252664 32 | 2017-06-12 07:23:35.557214 result: 63.98888347% (0.9866254384003387, 0.97799788949857136, 0.73936076203923151, 0.91569187187148926, 0.51828353005522221) params: {'max_features': 50, 'n_estimators': 500} 0.777988742549 fetures: 670 post30_is_holiday_std:0.03050903718051737 , post366_is_festival_count:0.02943960513768783 , post366_is_festival_mean:0.02725363727286884 , post30_is_week_std:0.020258481676271825 , post60_dayofyear_mean_consumption:0.016901125033073568 , post30_dayofyear_mean_consumption:0.015474243304829525 , post180_dayofyear_mean_consumption:0.015311166084897654 , post90_dayofyear_mean_consumption:0.014101101716247481 , post360_dayofyear_mean_consumption:0.01243032019748404 , post30_is_weekend_std:0.012062861295601066 , post60_wind_type_75%:0.011108343014274045 , post30_dayofweek_std:0.010988796545960702 , post60_is_week_std:0.010505236412668078 , post366_week_std:0.009535675996744628 , dayofyear:0.009120641924424624 , post366_days_in_month_25%:0.00859323859809345 , weather_min:0.008308283289756335 , post366_is_festival_50%:0.007882745783252047 , post60_is_holiday_std:0.007720773225235407 , post180_days_in_month_25%:0.007471641316633146 33 | 2017-06-12 07:55:57.083013 result: (0.98647077082579, 0.97766446838697241, 0.73736552870742533, 0.91550905640307811, 0.51525948359136309) params: {'n_estimators': 500, 'max_features': 200} 0.770428631298 fetures: 1150 post366_is_festival_count:0.0646681767325599 , post366_is_festival_mean:0.056423693956536305 , post30_is_week_std:0.0455505117833928 , post30_is_holiday_std:0.04338312404183893 , post180_dayofyear_mean_consumption:0.02487604783294715 , post90_dayofyear_mean_consumption:0.02452895920170585 , post360_dayofyear_mean_consumption:0.02340830246292294 , post60_dayofyear_mean_consumption:0.02097400094658198 , post30_dayofyear_mean_consumption:0.0194873419557608 , post60_is_holiday_std:0.01278209594108359 , dayofyear:0.012252390059380178 , post366_week_std:0.012062631412360652 , post30_dayofweek_std:0.009921234681556349 , post366_weekofyear_std:0.009385565374136451 , post60_is_week_std:0.008502364159111568 , post30_is_weekend_std:0.008131421733937983 , post300_is_festival_50%:0.007739547507486541 , post300_is_weekend_count:0.007084020430866262 , week:0.006933822446656162 , weather_min:0.00680273392220532 34 | 2017-06-12 08:29:16.440120 result: (0.9857697957310936, 0.97520264450998351, 0.74766783959860228, 0.91986322339539617, 0.56379069236105284) params: {'max_features': 100, 'n_estimators': 100} 0.783397061484 fetures: 710 post366_is_festival_is0_day_consumption:0.053009558125623926 , post366_is_festival_mean:0.045808018906376535 , post30_is_week_std:0.04033248224727701 , post366_is_festival_count:0.032184321358420726 , post180_dayofyear_mean_consumption:0.0312250898045562 , post30_is_holiday_std:0.03054884518165129 , post90_dayofyear_mean_consumption:0.029286292769630674 , post60_wind_type_75%:0.02402238487725719 , post360_dayofyear_mean_consumption:0.022255195079653192 , post30_dayofyear_mean_consumption:0.020338018451951868 , post60_dayofyear_mean_consumption:0.01671504236551689 , dayofyear:0.012799754178785783 , post180_days_in_month_mean:0.012572628428667238 , post60_is_holiday_std:0.01151770828347939 , post30_weather_type_std:0.01036604896398658 , week:0.009121525707887912 , post366_month_mean:0.008995026879686969 , post366_is_festival_75%:0.008788192227401133 , weather_min:0.00873260683030331 , post366_weather_max_count:0.00808530625765832 35 | 2017-06-12 19:14:40.093735 result: (0.9862574473283529, 0.97567068407074931, 0.7350667487545588, 0.89718025745485819, 0.53555567174279206) params: {'max_features': 100, 'n_estimators': 100} 0.771231894296 fetures: 979 post366_is_festival_count:0.05078995930825454 , post366_is_festival_25%_day_consumption:0.03655333121927563 , post366_is_festival_count_day_consumption:0.034339266133819775 , post30_is_week_std:0.03409284967110627 , post366_is_festival_mean_day_consumption:0.027057561605615767 , post366_is_festival_mean:0.024542855052014607 , post366_is_week_std_day_consumption:0.020112807358730535 , post366_is_holiday_std_day_consumption:0.01903976405461335 , post366_is_festival_50%_day_consumption:0.018864031016925794 , post366_is_week_min_day_consumption:0.01527043640493849 , post30_is_holiday_std:0.014907330801875033 , post366_days_in_month_max:0.01289563882751284 , post366_is_holiday_min_day_consumption:0.012335367334325144 , dayofyear:0.01161472135940337 , post366_week_max_day_consumption:0.010497489761341829 , post180_is_festival_mean:0.008668883992996224 , post366_weekofyear_std:0.008658860890646651 , post180_is_festival_50%_day_consumption:0.00865598854773328 , post366_week_std:0.008076164665792144 , post366_month_max_day_consumption:0.007685215631141424 36 | 2017-06-12 19:25:19.073365 result: (0.9867991261621545, 0.97766777023452556, 0.7367590881109578, 0.90249629847624457, 0.56241398970882805) params: {'max_features': 100, 'n_estimators': 200} 0.770315820758 fetures: 979 post366_is_festival_count:0.040799545234957754 , post366_is_festival_mean_day_consumption:0.0395232336344848 , post366_is_festival_25%_day_consumption:0.03750448172308711 , post366_is_festival_count_day_consumption:0.0352292451957175 , post366_is_festival_mean:0.03307791110328274 , post366_is_festival_50%_day_consumption:0.027910759969119275 , post30_is_week_std:0.023358584356657115 , post30_is_holiday_std:0.018964289843085864 , post366_is_week_std_day_consumption:0.018350469580544503 , post366_is_holiday_std_day_consumption:0.016713986637495352 , post366_is_week_min_day_consumption:0.015499683260332877 , dayofyear:0.009854263631475344 , post366_is_holiday_min_day_consumption:0.009259430501178311 , post366_week_std:0.008391921034101532 , week:0.008333252422592408 , post366_week_max_day_consumption:0.008150295848354715 , post30_dayofweek_std:0.008086278138478816 , post180_is_festival_mean:0.00790448004931459 , post90_is_holiday_std_day_consumption:0.007325533253923696 , post366_weekofyear_std:0.007046368021324972 37 | 2017-06-12 20:50:22.306037 result: (0.9860635178227071, 0.97548880480927813, 0.73580500529532256, 0.89254671765264848, 0.57712359121888446) params: {'n_estimators': 100, 'max_features': 100} 0.748240129169 fetures: 6315 post366_post366_combine_quarter_and_is_festival_50%_day_consumption:0.02221312568471655 , post366_post366_combine_week_and_is_festival_mean_day_consumption:0.017795924159881484 , post366_post366_combine_week_and_is_festival_min_day_consumption:0.013661559410389981 , post90_post90_combine_week_and_is_festival_min_day_consumption:0.013370064655104154 , post366_post366_combine_dayofyear_and_is_festival_50%_day_consumption:0.012841577310095045 , post366_post366_combine_days_in_month_and_is_festival_mean_day_consumption:0.011008253032063627 , post366_post366_combine_year_and_is_festival_25%_day_consumption:0.009871354645920934 , post366_post366_combine_week_and_is_festival_25%_day_consumption:0.009276392455392397 , post366_post366_combine_weather_max_and_is_festival_50%_day_consumption:0.008991516025441473 , post366_post366_combine_weather_max_and_is_festival_25%_day_consumption:0.00892902318983094 , post366_post366_combine_weather_max_and_is_festival_count_day_consumption:0.008825670661912508 , post366_post366_combine_weekofyear_and_is_festival_count_day_consumption:0.008584675661534142 , post366_post366_combine_month_and_is_festival_50%_day_consumption:0.008467547043043849 , post30_is_week_std:0.008214512984556981 , post366_is_festival_mean_day_consumption:0.008140061461525879 , post366_post366_combine_days_in_month_and_is_week_50%_day_consumption:0.007978852351534515 , post366_post366_combine_month_and_is_festival_std_day_consumption:0.007531485636624336 , post366_post366_combine_year_and_is_festival_mean_day_consumption:0.007517912583840054 , post366_post366_combine_month_and_is_festival_25%_day_consumption:0.0073852186900209395 , post30_post30_combine_quarter_and_rare_weather_std_day_consumption:0.00628694275045872 38 | 2017-06-12 21:07:18.433023 result: (0.9875340316757467, 0.97970303308119733, 0.75657589321922436, 0.91571876988603362, 0.59606956020233115) params: {'max_features': 100, 'n_estimators': 500} 0.774661486377 fetures: 1666 post366_post366_combine_days_in_month_and_is_festival_count_day_consumption:0.013616842976884605 , post366_is_festival_25%_day_consumption:0.013511528563406282 , post366_post366_combine_days_in_month_and_is_festival_50%_day_consumption:0.013298023062819792 , post366_post366_combine_quarter_and_is_festival_50%_day_consumption:0.012523627379392574 , post180_post180_combine_week_and_is_festival_min_day_consumption:0.012367097148214145 , post366_post366_combine_dayofyear_and_is_festival_25%_day_consumption:0.01232556498664281 , post366_post366_combine_weekofyear_and_is_festival_mean_day_consumption:0.011291173850269115 , post90_post90_combine_weekofyear_and_is_festival_min_day_consumption:0.01099491105470402 , post366_post366_combine_weather_min_and_is_festival_25%_day_consumption:0.010886237130079735 , post366_post366_combine_week_and_is_festival_count_day_consumption:0.010294204094960957 , post366_is_festival_mean:0.010237850644440431 , post366_post366_combine_month_and_is_festival_25%_day_consumption:0.009964728734108365 , post366_post366_combine_week_and_is_festival_25%_day_consumption:0.009464475947519575 , post366_post366_combine_year_and_is_festival_count_day_consumption:0.009410767547369867 , post366_post366_combine_month_and_is_festival_50%_day_consumption:0.009405389469942706 , post366_post366_combine_weather_min_and_is_festival_mean_day_consumption:0.009191534959431901 , post366_post366_combine_dayofyear_and_is_festival_mean_day_consumption:0.008877472914973187 , post366_post366_combine_days_in_month_and_is_festival_mean_day_consumption:0.008677306241352722 , post366_is_festival_50%_day_consumption:0.008439029435670908 , post366_post366_combine_weather_max_and_is_festival_count_day_consumption:0.00821951136887792 39 | 2017-06-12 22:23:55.298309 result: (0.9868607500233955, 0.97798381605676776, 0.74116781023157952, 0.89751405429135322, 0.53704631521278068) params: {'n_estimators': 200, 'max_features': 200} 0.768118651739 fetures: 3000 post366_post366_combine_weekofyear_and_is_festival_mean_day_consumption:0.014474165442483193 , post366_post366_combine_weekofyear_and_is_festival_25%_day_consumption:0.012691197563604437 , post366_post366_combine_days_in_month_and_is_festival_50%_day_consumption:0.012617939859521598 , post366_post366_combine_days_in_month_and_wind_type_75%_day_consumption:0.011543684688620787 , post366_is_festival_count:0.011124442902471105 , post180_post180_combine_days_in_month_and_wind_type_min_day_consumption:0.010662163622520378 , post366_post366_combine_year_and_is_festival_50%_day_consumption:0.010417934960333995 , post366_post366_combine_quarter_and_is_festival_mean_day_consumption:0.010211410961749379 , post90_post90_combine_weekofyear_and_is_festival_min_day_consumption:0.010075932804518472 , post366_post366_combine_days_in_month_and_is_holiday_std_day_consumption:0.009947585288546551 , post366_post366_combine_weather_max_and_is_week_std_day_consumption:0.009581154633333824 , post366_post366_combine_dayofyear_and_is_festival_50%_day_consumption:0.009443225013791821 , post366_post366_combine_month_and_is_festival_mean_day_consumption:0.009348100429311995 , post30_is_week_std:0.009092924828265432 , post366_post366_combine_year_and_is_festival_mean_day_consumption:0.008965047271731115 , post90_post90_combine_week_and_is_festival_min_day_consumption:0.008933227794773128 , post366_is_festival_50%_day_consumption:0.008656116175382443 , post180_post180_combine_weekofyear_and_is_festival_min_day_consumption:0.008619167153293968 , post366_post366_combine_week_and_is_festival_25%_day_consumption:0.008598785575571626 , post180_post180_combine_dayofyear_and_is_festival_min_day_consumption:0.008444706483637206 40 | 2017-06-12 23:29:52.065789 result: 41 | 2017-06-12 23:45:41.635635 result: (1.0, 1.0, 0.74049079281277497, 0.83801583802054425, 0.66110825830568487) 42 | 2017-06-13 06:36:07.164962 result: (0.9999999999999999, 1.0, 0.74169914680087734, 0.84048503686220044, 0.6658133990544437) 43 | 2017-06-13 07:55:11.229020 result: (0.9876500688451734, 0.98078256144556675, 0.78898896165565757, 0.86008654041103993, 0.68182410751042843) 44 | -------------------------------------------------------------------------------- /code/rules.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from datetime import datetime, timedelta\n", 12 | "\n", 13 | "import numpy as np\n", 14 | "import pandas as pd\n", 15 | "import scipy as sp" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "indexs = pd.date_range('2016/10/1', '2016/10/31')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "def gen_df(g):\n", 38 | " df = pd.DataFrame(index=indexs, columns=['power_consumption'])\n", 39 | " s = pd.Series(g.power_consumption, index=indexs)\n", 40 | " df['power_consumption'] = s\n", 41 | " df.fillna(df.power_consumption.mean(), inplace=True)\n", 42 | " return df" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "train_path = '../dataset/train.csv'" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "train_df = pd.read_csv(train_path, parse_dates=['record_date'])" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 6, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "train_df['record_date'] = train_df.record_date + timedelta(days=31)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 8, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "result_df = train_df.set_index('record_date').groupby(['user_id']).apply(gen_df)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 9, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "submit_df = result_df.reset_index().groupby('level_1')[['power_consumption']].sum()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 10, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "submit_df['predict_date'] = submit_df.index.map(lambda x:x.strftime('%Y%m%d'))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 11, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "submit_df['predict_power_consumption'] = submit_df.power_consumption.astype(int)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 12, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "submit_df.to_csv('../Tianchi_power_predict_table.csv', columns=['predict_date', 'predict_power_consumption'], index=False)" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "anaconda-cloud": {}, 136 | "hide_input": false, 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.5.1" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /code/run.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "!date" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# 规则,前一个月平均值 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "extract_ipy = 'trick_extract.ipynb'\n", 30 | "extract_py = 'trick_extract.py'" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# !jupyter nbconvert --to python $extract_ipy" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "# !python $extract_py test" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## 切分数据" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "!runipy split_samples.ipynb" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 过滤清洗数据" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "!jupyter nbconvert --to python clean_data.ipynb" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "!python clean_data.py test" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "for i in range(1, 10):\n", 111 | " !python clean_data.py train $i" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## 提取特征 " 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": true 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "extract_ipy = 'extract_fetures.ipynb'\n", 130 | "extract_py = 'extract_fetures.py'" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "!jupyter nbconvert --to python $extract_ipy" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "for i in range(1, 10): \n", 153 | " !python $extract_py train $i" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": true 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "!python $extract_py test" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": true 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "!runipy combine_fetures.ipynb" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## 训练模型 " 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "collapsed": false, 190 | "scrolled": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "!runipy train_model.ipynb" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## 模型预测 " 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "!runipy predict_result.ipynb" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "!date" 224 | ] 225 | } 226 | ], 227 | "metadata": { 228 | "anaconda-cloud": {}, 229 | "hide_input": false, 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.5.1" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /code/split_samples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 123, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from os.path import exists, join\n", 12 | "from os import mkdir\n", 13 | "from datetime import datetime, timedelta, date\n", 14 | "from collections import namedtuple\n", 15 | "\n", 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import scipy as sp" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 124, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "train_base_dir = '../dataset/fetures/'" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# 移动滑窗划分成9个predict样本 " 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 125, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "df = pd.read_csv('../dataset/train.csv', parse_dates=['record_date'], index_col=['record_date'])" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 126, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "begin = df.index.max().date() + timedelta(1)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## 训练样本划分" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 127, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "TimeSpan = namedtuple('TimeSpan', ['train_start', 'train_end', 'predict_start', 'predict_end'])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 128, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "timespans = []" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 129, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "end = begin - timedelta(1)\n", 99 | "for i in range(9):\n", 100 | " p_end = date(begin.year, begin.month-i, begin.day)\n", 101 | " predict_end = p_end - timedelta(1)\n", 102 | " predict_start = date(begin.year, begin.month-i-1, begin.day)\n", 103 | " train_end = predict_start - timedelta(1)\n", 104 | " train_start = date(predict_start.year-1, predict_start.month, predict_start.day)\n", 105 | " span = TimeSpan(train_start, train_end, predict_start, predict_end)\n", 106 | "# print(span)\n", 107 | " timespans.append(span)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "##### 创建文件架 " 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 130, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "for span in timespans:\n", 126 | " dir_n = str(span.predict_start.month)\n", 127 | " path = join(train_base_dir, dir_n)\n", 128 | " if not exists(path):\n", 129 | " mkdir(path)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 131, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "span = timespans[0]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 132, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "def create_train_sample(timespan): \n", 152 | " train = df.loc[str(span.train_start):str(span.train_end)] \n", 153 | " train_predict = df.loc[str(span.predict_start):str(span.predict_end)].reset_index()\n", 154 | " train_predict = train_predict.groupby('record_date')[['power_consumption']].sum()\n", 155 | " train_predict['predict_power_consumption'] = train_predict.power_consumption.astype(np.int64)\n", 156 | " train_predict.drop('power_consumption', axis=1, inplace=True)\n", 157 | " train_predict.index.name = 'predict_date'\n", 158 | " train_path = join(train_base_dir, str(span.predict_start.month), 'train.csv')\n", 159 | " train_predict_path = join(train_base_dir, str(span.predict_start.month), 'train_predict.csv')\n", 160 | " train.to_csv(train_path)\n", 161 | " train.info()\n", 162 | " train_predict.to_csv(train_predict_path)\n", 163 | " train_predict.info()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 133, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "\n", 178 | "DatetimeIndex: 532156 entries, 2015-09-01 to 2016-08-31\n", 179 | "Data columns (total 2 columns):\n", 180 | "user_id 532156 non-null int64\n", 181 | "power_consumption 532156 non-null int64\n", 182 | "dtypes: int64(2)\n", 183 | "memory usage: 12.2 MB\n", 184 | "\n", 185 | "DatetimeIndex: 30 entries, 2016-09-01 to 2016-09-30\n", 186 | "Data columns (total 1 columns):\n", 187 | "predict_power_consumption 30 non-null int64\n", 188 | "dtypes: int64(1)\n", 189 | "memory usage: 480.0 bytes\n", 190 | "\n", 191 | "DatetimeIndex: 532156 entries, 2015-08-01 to 2016-07-31\n", 192 | "Data columns (total 2 columns):\n", 193 | "user_id 532156 non-null int64\n", 194 | "power_consumption 532156 non-null int64\n", 195 | "dtypes: int64(2)\n", 196 | "memory usage: 12.2 MB\n", 197 | "\n", 198 | "DatetimeIndex: 31 entries, 2016-08-01 to 2016-08-31\n", 199 | "Data columns (total 1 columns):\n", 200 | "predict_power_consumption 31 non-null int64\n", 201 | "dtypes: int64(1)\n", 202 | "memory usage: 496.0 bytes\n", 203 | "\n", 204 | "DatetimeIndex: 532153 entries, 2015-07-01 to 2016-06-30\n", 205 | "Data columns (total 2 columns):\n", 206 | "user_id 532153 non-null int64\n", 207 | "power_consumption 532153 non-null int64\n", 208 | "dtypes: int64(2)\n", 209 | "memory usage: 12.2 MB\n", 210 | "\n", 211 | "DatetimeIndex: 31 entries, 2016-07-01 to 2016-07-31\n", 212 | "Data columns (total 1 columns):\n", 213 | "predict_power_consumption 31 non-null int64\n", 214 | "dtypes: int64(1)\n", 215 | "memory usage: 496.0 bytes\n", 216 | "\n", 217 | "DatetimeIndex: 532150 entries, 2015-06-01 to 2016-05-31\n", 218 | "Data columns (total 2 columns):\n", 219 | "user_id 532150 non-null int64\n", 220 | "power_consumption 532150 non-null int64\n", 221 | "dtypes: int64(2)\n", 222 | "memory usage: 12.2 MB\n", 223 | "\n", 224 | "DatetimeIndex: 30 entries, 2016-06-01 to 2016-06-30\n", 225 | "Data columns (total 1 columns):\n", 226 | "predict_power_consumption 30 non-null int64\n", 227 | "dtypes: int64(1)\n", 228 | "memory usage: 480.0 bytes\n", 229 | "\n", 230 | "DatetimeIndex: 532150 entries, 2015-05-01 to 2016-04-30\n", 231 | "Data columns (total 2 columns):\n", 232 | "user_id 532150 non-null int64\n", 233 | "power_consumption 532150 non-null int64\n", 234 | "dtypes: int64(2)\n", 235 | "memory usage: 12.2 MB\n", 236 | "\n", 237 | "DatetimeIndex: 31 entries, 2016-05-01 to 2016-05-31\n", 238 | "Data columns (total 1 columns):\n", 239 | "predict_power_consumption 31 non-null int64\n", 240 | "dtypes: int64(1)\n", 241 | "memory usage: 496.0 bytes\n", 242 | "\n", 243 | "DatetimeIndex: 532148 entries, 2015-04-01 to 2016-03-31\n", 244 | "Data columns (total 2 columns):\n", 245 | "user_id 532148 non-null int64\n", 246 | "power_consumption 532148 non-null int64\n", 247 | "dtypes: int64(2)\n", 248 | "memory usage: 12.2 MB\n", 249 | "\n", 250 | "DatetimeIndex: 30 entries, 2016-04-01 to 2016-04-30\n", 251 | "Data columns (total 1 columns):\n", 252 | "predict_power_consumption 30 non-null int64\n", 253 | "dtypes: int64(1)\n", 254 | "memory usage: 480.0 bytes\n", 255 | "\n", 256 | "DatetimeIndex: 532147 entries, 2015-03-01 to 2016-02-29\n", 257 | "Data columns (total 2 columns):\n", 258 | "user_id 532147 non-null int64\n", 259 | "power_consumption 532147 non-null int64\n", 260 | "dtypes: int64(2)\n", 261 | "memory usage: 12.2 MB\n", 262 | "\n", 263 | "DatetimeIndex: 31 entries, 2016-03-01 to 2016-03-31\n", 264 | "Data columns (total 1 columns):\n", 265 | "predict_power_consumption 31 non-null int64\n", 266 | "dtypes: int64(1)\n", 267 | "memory usage: 496.0 bytes\n", 268 | "\n", 269 | "DatetimeIndex: 530692 entries, 2015-02-01 to 2016-01-31\n", 270 | "Data columns (total 2 columns):\n", 271 | "user_id 530692 non-null int64\n", 272 | "power_consumption 530692 non-null int64\n", 273 | "dtypes: int64(2)\n", 274 | "memory usage: 12.1 MB\n", 275 | "\n", 276 | "DatetimeIndex: 29 entries, 2016-02-01 to 2016-02-29\n", 277 | "Data columns (total 1 columns):\n", 278 | "predict_power_consumption 29 non-null int64\n", 279 | "dtypes: int64(1)\n", 280 | "memory usage: 464.0 bytes\n", 281 | "\n", 282 | "DatetimeIndex: 530693 entries, 2015-01-01 to 2015-12-31\n", 283 | "Data columns (total 2 columns):\n", 284 | "user_id 530693 non-null int64\n", 285 | "power_consumption 530693 non-null int64\n", 286 | "dtypes: int64(2)\n", 287 | "memory usage: 12.1 MB\n", 288 | "\n", 289 | "DatetimeIndex: 31 entries, 2016-01-01 to 2016-01-31\n", 290 | "Data columns (total 1 columns):\n", 291 | "predict_power_consumption 31 non-null int64\n", 292 | "dtypes: int64(1)\n", 293 | "memory usage: 496.0 bytes\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "for span in timespans:\n", 299 | " create_train_sample(span)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 138, 305 | "metadata": { 306 | "collapsed": true 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "test = df.loc['2015/10/1':'2016/9/30']" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 139, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "index = pd.date_range('2016/10/1', '2016/10/31')\n", 322 | "test_predict = pd.DataFrame(0, index=index, columns=['predict_power_consumption'])\n", 323 | "test_predict.index.name = 'predict_date'" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 140, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "\n", 338 | "DatetimeIndex: 532159 entries, 2015-10-01 to 2016-09-28\n", 339 | "Data columns (total 2 columns):\n", 340 | "user_id 532159 non-null int64\n", 341 | "power_consumption 532159 non-null int64\n", 342 | "dtypes: int64(2)\n", 343 | "memory usage: 12.2 MB\n" 344 | ] 345 | } 346 | ], 347 | "source": [ 348 | "test.to_csv('../dataset/fetures/test.csv')\n", 349 | "test.info()" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 142, 355 | "metadata": { 356 | "collapsed": false 357 | }, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "\n", 364 | "DatetimeIndex: 31 entries, 2016-10-01 to 2016-10-31\n", 365 | "Freq: D\n", 366 | "Data columns (total 1 columns):\n", 367 | "predict_power_consumption 31 non-null int64\n", 368 | "dtypes: int64(1)\n", 369 | "memory usage: 496.0 bytes\n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "test_predict.to_csv('../dataset/fetures/test_predict.csv')\n", 375 | "test_predict.info()" 376 | ] 377 | } 378 | ], 379 | "metadata": { 380 | "anaconda-cloud": {}, 381 | "hide_input": false, 382 | "kernelspec": { 383 | "display_name": "Python 3", 384 | "language": "python", 385 | "name": "python3" 386 | }, 387 | "language_info": { 388 | "codemirror_mode": { 389 | "name": "ipython", 390 | "version": 3 391 | }, 392 | "file_extension": ".py", 393 | "mimetype": "text/x-python", 394 | "name": "python", 395 | "nbconvert_exporter": "python", 396 | "pygments_lexer": "ipython3", 397 | "version": "3.5.1" 398 | } 399 | }, 400 | "nbformat": 4, 401 | "nbformat_minor": 2 402 | } 403 | -------------------------------------------------------------------------------- /code/train_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 35, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pickle\n", 12 | "from datetime import datetime\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import scipy as sp\n", 16 | "import pandas as pd\n", 17 | "\n", 18 | "from sklearn.ensemble import RandomForestRegressor\n", 19 | "from sklearn.metrics import r2_score, roc_auc_score\n", 20 | "from sklearn import cross_validation\n", 21 | "from sklearn.model_selection import GridSearchCV\n", 22 | "from sklearn.svm import SVR\n", 23 | "import mlxtend" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 36, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "from mlxtend.regressor import StackingRegressor\n", 35 | "from mlxtend.data import boston_housing_data\n", 36 | "from sklearn.linear_model import LinearRegression\n", 37 | "from sklearn.linear_model import Ridge\n", 38 | "from sklearn.svm import SVR\n", 39 | "from sklearn.ensemble import ExtraTreesRegressor" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 37, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "def loss_score(predict, real):\n", 51 | " f = (real - predict)/real\n", 52 | " n = len(f)\n", 53 | " f = f.replace([np.nan, -np.nan], 0)\n", 54 | " score = 1 - np.abs(f).sum()/n\n", 55 | " return score " 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 38, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "model_path = '../dataset/fetures/model.pkl'" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 39, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "fetures = pd.read_csv('../dataset/fetures/train_feture.csv')" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 40, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "train_x = fetures.drop(['predict_date', 'predict_power_consumption'], axis=1)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 41, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "train_y = fetures['predict_power_consumption']" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### 使用 gridsearch 调参" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 43, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "parameters = {\n", 118 | " 'n_estimators': [10, 100, 200, 300, 500],\n", 119 | "# 'criterion': ['mse', 'mae'],\n", 120 | " 'max_features': [10, 50, 100, 200, 'sqrt']\n", 121 | "}" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 44, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# model = RandomForestRegressor(n_jobs=-1, oob_score=True)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 45, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "# cv = cross_validation.ShuffleSplit(train_x.shape[0], n_iter=8, test_size=0.1, random_state=0)\n", 144 | "# clf = GridSearchCV(model, parameters, cv=cv)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 46, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# clf.fit(train_x.fillna(0), train_y)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 47, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# model = RandomForestRegressor(n_jobs=-1, oob_score=True, random_state=666, **clf.best_params_)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 48, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "# cv = cross_validation.ShuffleSplit(train_x.shape[0], n_iter=5, test_size=0.1, random_state=666)\n", 178 | "# r2_score = cross_validation.cross_val_score(model, train_x.fillna(0), train_y, cv=cv)\n", 179 | "# r2_score" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 288, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "cv = cross_validation.ShuffleSplit(train_x.shape[0], n_iter=10, test_size=0.2, random_state=666)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 290, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "forest = RandomForestRegressor(\n", 202 | " n_jobs=-1,\n", 203 | "# criterion='mae',\n", 204 | " random_state=1,\n", 205 | " oob_score=True,\n", 206 | " n_estimators=100,\n", 207 | " max_features='auto')\n", 208 | "fores = RandomForestRegressor(\n", 209 | " n_jobs=-1,\n", 210 | " random_state=2,\n", 211 | " oob_score=True,\n", 212 | " n_estimators=300,\n", 213 | " max_features=300)\n", 214 | "extract = ExtraTreesRegressor(\n", 215 | " n_jobs=-1,\n", 216 | " random_state=3,\n", 217 | "# criterion='mae',\n", 218 | " bootstrap=True,\n", 219 | " oob_score=True,\n", 220 | " n_estimators=100,\n", 221 | " max_features='auto')\n", 222 | "extrac = ExtraTreesRegressor(\n", 223 | " n_jobs=-1,\n", 224 | " random_state=4,\n", 225 | " bootstrap=True,\n", 226 | " oob_score=True,\n", 227 | " n_estimators=300,\n", 228 | " max_features=300)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 292, 234 | "metadata": { 235 | "collapsed": true 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "clfs = [forest, fores, extract, extrac]" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 293, 245 | "metadata": { 246 | "collapsed": true 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "params = {\n", 251 | " 'forest__n-estimators': [10, 50, 100],\n", 252 | " 'forest__max-features': [10, 50, 100],\n", 253 | " 'fores__n-estimators': [100, 200, 500],\n", 254 | " 'fores__max-features': [100, 200, 300],\n", 255 | " 'extract__n-estimators': [10, 50, 100],\n", 256 | " 'extract__max-features': [10, 50, 100],\n", 257 | " 'extrac__n-estimators': [100, 200, 500],\n", 258 | " 'extrac__max-features': [100, 200, 300],\n", 259 | "}" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 294, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "meta = LinearRegression(n_jobs=-1)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 295, 276 | "metadata": { 277 | "collapsed": false 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "stacker = StackingRegressor(regressors=clfs, meta_regressor=meta, verbose=True)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 296, 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "grid = GridSearchCV(estimator=stacker, param_grid=params, cv=cv)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 297, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "{'bootstrap': True,\n", 306 | " 'criterion': 'mse',\n", 307 | " 'max_depth': None,\n", 308 | " 'max_features': 'auto',\n", 309 | " 'max_leaf_nodes': None,\n", 310 | " 'min_impurity_split': 1e-07,\n", 311 | " 'min_samples_leaf': 1,\n", 312 | " 'min_samples_split': 2,\n", 313 | " 'min_weight_fraction_leaf': 0.0,\n", 314 | " 'n_estimators': 100,\n", 315 | " 'n_jobs': -1,\n", 316 | " 'oob_score': True,\n", 317 | " 'random_state': 3,\n", 318 | " 'verbose': 0,\n", 319 | " 'warm_start': False}" 320 | ] 321 | }, 322 | "execution_count": 297, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "extract.get_params()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 298, 334 | "metadata": { 335 | "collapsed": true 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "# grid.fit(train_x.fillna(0), train_y)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 300, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "Fitting 4 regressors...\n", 354 | "Fitting regressor1: randomforestregressor (1/4)\n", 355 | "Fitting 4 regressors...\n", 356 | "Fitting regressor1: randomforestregressor (1/4)\n", 357 | "Fitting 4 regressors...\n", 358 | "Fitting regressor1: randomforestregressor (1/4)\n", 359 | "Fitting 4 regressors...\n", 360 | "Fitting regressor1: randomforestregressor (1/4)\n", 361 | "Fitting regressor2: randomforestregressor (2/4)\n", 362 | "Fitting regressor2: randomforestregressor (2/4)\n", 363 | "Fitting regressor2: randomforestregressor (2/4)\n", 364 | "Fitting regressor2: randomforestregressor (2/4)\n", 365 | "Fitting regressor3: extratreesregressor (3/4)\n", 366 | "Fitting regressor3: extratreesregressor (3/4)\n", 367 | "Fitting regressor3: extratreesregressor (3/4)\n", 368 | "Fitting regressor3: extratreesregressor (3/4)\n", 369 | "Fitting regressor4: extratreesregressor (4/4)\n", 370 | "Fitting regressor4: extratreesregressor (4/4)\n", 371 | "Fitting regressor4: extratreesregressor (4/4)\n", 372 | "Fitting regressor4: extratreesregressor (4/4)\n", 373 | "Fitting 4 regressors...\n", 374 | "Fitting regressor1: randomforestregressor (1/4)\n", 375 | "Fitting 4 regressors...\n", 376 | "Fitting regressor1: randomforestregressor (1/4)\n", 377 | "Fitting 4 regressors...\n", 378 | "Fitting regressor1: randomforestregressor (1/4)\n", 379 | "Fitting 4 regressors...\n", 380 | "Fitting regressor1: randomforestregressor (1/4)\n", 381 | "Fitting regressor2: randomforestregressor (2/4)\n", 382 | "Fitting regressor2: randomforestregressor (2/4)\n", 383 | "Fitting regressor2: randomforestregressor (2/4)\n", 384 | "Fitting regressor2: randomforestregressor (2/4)\n", 385 | "Fitting regressor3: extratreesregressor (3/4)\n", 386 | "Fitting regressor3: extratreesregressor (3/4)\n", 387 | "Fitting regressor3: extratreesregressor (3/4)\n", 388 | "Fitting regressor3: extratreesregressor (3/4)\n", 389 | "Fitting regressor4: extratreesregressor (4/4)\n", 390 | "Fitting regressor4: extratreesregressor (4/4)\n", 391 | "Fitting regressor4: extratreesregressor (4/4)\n", 392 | "Fitting regressor4: extratreesregressor (4/4)\n", 393 | "Fitting 4 regressors...\n", 394 | "Fitting regressor1: randomforestregressor (1/4)\n", 395 | "Fitting 4 regressors...\n", 396 | "Fitting regressor1: randomforestregressor (1/4)\n", 397 | "Fitting regressor2: randomforestregressor (2/4)\n", 398 | "Fitting regressor2: randomforestregressor (2/4)\n", 399 | "Fitting regressor3: extratreesregressor (3/4)\n", 400 | "Fitting regressor3: extratreesregressor (3/4)\n", 401 | "Fitting regressor4: extratreesregressor (4/4)\n", 402 | "Fitting regressor4: extratreesregressor (4/4)\n" 403 | ] 404 | }, 405 | { 406 | "name": "stderr", 407 | "output_type": "stream", 408 | "text": [ 409 | "[Parallel(n_jobs=-1)]: Done 10 out of 10 | elapsed: 2.1min finished\n" 410 | ] 411 | } 412 | ], 413 | "source": [ 414 | "r2_score = cross_validation.cross_val_score(stacker, train_x, train_y, cv=cv, verbose=True, n_jobs=-1)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 301, 420 | "metadata": { 421 | "collapsed": true 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "# grid.fit(train_x.fillna(0), train_y)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 302, 431 | "metadata": { 432 | "collapsed": false 433 | }, 434 | "outputs": [ 435 | { 436 | "data": { 437 | "text/plain": [ 438 | "(0.78898896165565757, 0.86008654041103993, 0.68182410751042843)" 439 | ] 440 | }, 441 | "execution_count": 302, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "r2_score.mean(), r2_score.max(), r2_score.min()" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 303, 453 | "metadata": { 454 | "collapsed": false 455 | }, 456 | "outputs": [ 457 | { 458 | "name": "stdout", 459 | "output_type": "stream", 460 | "text": [ 461 | "Fitting 4 regressors...\n", 462 | "Fitting regressor1: randomforestregressor (1/4)\n", 463 | "Fitting regressor2: randomforestregressor (2/4)\n", 464 | "Fitting regressor3: extratreesregressor (3/4)\n", 465 | "Fitting regressor4: extratreesregressor (4/4)\n" 466 | ] 467 | }, 468 | { 469 | "data": { 470 | "text/plain": [ 471 | "StackingRegressor(meta_regressor=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=-1, normalize=False),\n", 472 | " regressors=[RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n", 473 | " max_features='auto', max_leaf_nodes=None,\n", 474 | " min_impurity_split=1e-07, min_samples_leaf=1,\n", 475 | " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", 476 | " n_estimators=100, n_jobs=-1, oob_s...n_estimators=300, n_jobs=-1,\n", 477 | " oob_score=True, random_state=4, verbose=0, warm_start=False)],\n", 478 | " verbose=True)" 479 | ] 480 | }, 481 | "execution_count": 303, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "stacker.fit(train_x.fillna(0), train_y)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 304, 493 | "metadata": { 494 | "collapsed": false 495 | }, 496 | "outputs": [], 497 | "source": [ 498 | "r2_all_scorr = stacker.score(train_x.fillna(0), train_y)" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": { 505 | "collapsed": true 506 | }, 507 | "outputs": [], 508 | "source": [ 509 | "# importance_df = pd.DataFrame(model.feature_importances_, index=train_x.columns)\n", 510 | "\n", 511 | "# importance_df.sort_values(0, ascending=False, inplace=True)\n", 512 | "\n", 513 | "# importance_top20 = ' , '.join(['{}:{} '.format(x, importance_df.loc[x].values[0]) for x in importance_df.index][:20])\n", 514 | "# importance_top20" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 305, 520 | "metadata": { 521 | "collapsed": false 522 | }, 523 | "outputs": [ 524 | { 525 | "data": { 526 | "text/plain": [ 527 | "0.9876500688451734" 528 | ] 529 | }, 530 | "execution_count": 305, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "predict_result = stacker.predict(train_x.fillna(0))\n", 537 | "loss_s = loss_score(predict_result, train_y)\n", 538 | "loss_s" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 306, 544 | "metadata": { 545 | "collapsed": false 546 | }, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "(0.9876500688451734,\n", 552 | " 0.98078256144556675,\n", 553 | " 0.78898896165565757,\n", 554 | " 0.86008654041103993,\n", 555 | " 0.68182410751042843)" 556 | ] 557 | }, 558 | "execution_count": 306, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | } 562 | ], 563 | "source": [ 564 | "result = loss_s, r2_all_scorr, r2_score.mean(), r2_score.max(), r2_score.min()\n", 565 | "result" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 307, 571 | "metadata": { 572 | "collapsed": false 573 | }, 574 | "outputs": [ 575 | { 576 | "name": "stdout", 577 | "output_type": "stream", 578 | "text": [ 579 | "2017-06-13 07:55:11.228802 result: (0.9876500688451734, 0.98078256144556675, 0.78898896165565757, 0.86008654041103993, 0.68182410751042843)\n" 580 | ] 581 | } 582 | ], 583 | "source": [ 584 | "print(datetime.now(), 'result:', result)\n", 585 | "print(\n", 586 | " datetime.now(),\n", 587 | " 'result:', result,\n", 588 | " file=open('result.txt', 'a+'),\n", 589 | " sep=' ',\n", 590 | " end='\\n')" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 308, 596 | "metadata": { 597 | "collapsed": false 598 | }, 599 | "outputs": [], 600 | "source": [ 601 | "pickle.dump(stacker, open(model_path, 'wb'))" 602 | ] 603 | } 604 | ], 605 | "metadata": { 606 | "anaconda-cloud": {}, 607 | "hide_input": false, 608 | "kernelspec": { 609 | "display_name": "Python 3", 610 | "language": "python", 611 | "name": "python3" 612 | }, 613 | "language_info": { 614 | "codemirror_mode": { 615 | "name": "ipython", 616 | "version": 3 617 | }, 618 | "file_extension": ".py", 619 | "mimetype": "text/x-python", 620 | "name": "python", 621 | "nbconvert_exporter": "python", 622 | "pygments_lexer": "ipython3", 623 | "version": "3.5.1" 624 | } 625 | }, 626 | "nbformat": 4, 627 | "nbformat_minor": 2 628 | } 629 | -------------------------------------------------------------------------------- /code/trick_extract.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[59]: 5 | 6 | import sys 7 | from datetime import datetime, timedelta 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import scipy as sp 12 | 13 | 14 | # In[60]: 15 | 16 | def loss_score(predict, real): 17 | f = (real - predict)/real 18 | n = len(f) 19 | f = f.replace([np.nan, -np.nan], 0) 20 | score = 1 - np.abs(f).sum()/n 21 | return score 22 | # from sklearn.metrics import r2_score 23 | # loss_score = r2_score 24 | 25 | 26 | # In[61]: 27 | 28 | if sys.argv[1] == 'test': 29 | is_train = False 30 | else: 31 | is_train = True 32 | 33 | 34 | # In[62]: 35 | 36 | if is_train: 37 | train_path = '../dataset/fetures/train.csv' 38 | predict_path = '../dataset/fetures/train_predict.csv' 39 | feture_path = '../dataset/fetures/train_feture.csv' 40 | else: 41 | train_path = '../dataset/fetures/test.csv' 42 | predict_path = '../dataset/fetures/test_predict.csv' 43 | feture_path = '../dataset/fetures/test_feture.csv' 44 | 45 | 46 | # In[63]: 47 | 48 | train = pd.read_csv(train_path, parse_dates=['record_date']) 49 | 50 | 51 | # In[64]: 52 | 53 | predict = pd.read_csv(predict_path, parse_dates=['predict_date'], index_col=['predict_date']) 54 | 55 | 56 | # # 移动划窗规则 57 | 58 | # In[65]: 59 | 60 | days = len(predict) 61 | 62 | 63 | # In[66]: 64 | 65 | avg_df = train.groupby('record_date')[['power_consumption']].sum() 66 | 67 | 68 | # In[67]: 69 | 70 | avg_df.index = avg_df.index + timedelta(days) 71 | 72 | 73 | # In[68]: 74 | 75 | avg_df.index.name = 'predict_date' 76 | 77 | 78 | # In[69]: 79 | 80 | avg_df['predict_power_consumption'] = avg_df.power_consumption.astype(int) 81 | 82 | 83 | # In[70]: 84 | 85 | rule_predict = pd.DataFrame(avg_df.predict_power_consumption, index=predict.index) 86 | 87 | 88 | # In[71]: 89 | 90 | if is_train: 91 | print('move windows score', loss_score( rule_predict, predict)) 92 | 93 | 94 | # # 平均值 95 | 96 | # In[73]: 97 | 98 | train.record_date.unique().shape 99 | 100 | 101 | # In[74]: 102 | 103 | def avg_score(month, year='2016'): 104 | df = train.set_index('record_date').loc['{}/{}/1'.format(year, month):] 105 | days = df.index.unique().shape[0] 106 | mean_consumption = np.int64(df.power_consumption.sum()/days) 107 | 108 | predict['avg'] = mean_consumption 109 | 110 | if is_train: 111 | loss = loss_score(predict.avg, predict.predict_power_consumption) 112 | print('from {}-{} '.format(year, month), 'avg {} score', mean_consumption, loss_score(predict.predict_power_consumption, predict.avg)) 113 | 114 | 115 | 116 | 117 | # In[75]: 118 | 119 | for year in [2015, 2016]: 120 | for month in range(1, 13): 121 | if not(year == 2016 and month > 6): 122 | avg_score(month, year) 123 | 124 | 125 | # # 选择2015-1月到至今的平均值 126 | 127 | # In[76]: 128 | 129 | avg_score(6, 2016) if is_train else avg_score(9, 2016) 130 | 131 | 132 | # In[78]: 133 | 134 | if not is_train: 135 | predict['predict_power_consumption'] = predict.avg 136 | predict.index = predict.index.map(lambda x:x.strftime('%Y%m%d')) 137 | predict.index.name = 'predict_date' 138 | predict.to_csv('Tianchi_power_predict_table.csv', columns=['predict_power_consumption']) 139 | 140 | -------------------------------------------------------------------------------- /code/weather_crawl.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | @author: zhanglun 4 | @github: mrzhangboss 5 | @date: 2017/06/09 6 | 7 | """ 8 | from io import StringIO 9 | import requests 10 | from lxml import etree 11 | 12 | html_parser = etree.HTMLParser() 13 | 14 | 15 | def create_start_urls(province, year, month): 16 | url = 'http://lishi.tianqi.com/{}/{}{:02}.html'.format(province, year, month) 17 | return url 18 | 19 | 20 | def parse_ul(ul): 21 | data = ul.xpath('.//a/text()') 22 | weather = ul.xpath('.//li/text()') 23 | return data + weather 24 | 25 | 26 | def parse_month(elements): 27 | result = [] 28 | xpath = "//div[@class='tqtongji2']/ul" 29 | for ul in elements.xpath(xpath)[1:]: 30 | r = parse_ul(ul) 31 | result.append(r) 32 | return result 33 | 34 | 35 | if __name__ == '__main__': 36 | province = 'yangzhong' 37 | filename = '{}.csv'.format(province) 38 | for year in range(2015, 2017): 39 | for month in range(1, 13): 40 | url = create_start_urls(province, year, month) 41 | res = requests.get(url) 42 | elements = etree.parse(StringIO(res.text), parser=html_parser) 43 | result = parse_month(elements) 44 | print(len(result), result) 45 | with open(filename, 'a') as f: 46 | f.write('\n'.join((','.join(x) for x in result))) 47 | f.write('\n') -------------------------------------------------------------------------------- /dataset/holiday.csv: -------------------------------------------------------------------------------- 1 | date,holiday 2 | 20150102,2 3 | 20150224,2 4 | 20150322,1 5 | 20150101,2 6 | 20150329,1 7 | 20150112,0 8 | 20150122,0 9 | 20150124,1 10 | 20150119,0 11 | 20150222,2 12 | 20150221,2 13 | 20150206,0 14 | 20150214,1 15 | 20150131,1 16 | 20150207,1 17 | 20150315,1 18 | 20150108,0 19 | 20150312,0 20 | 20150305,0 21 | 20150111,1 22 | 20150113,0 23 | 20150307,1 24 | 20150103,2 25 | 20150227,0 26 | 20150319,0 27 | 20150406,2 28 | 20150212,0 29 | 20150127,0 30 | 20150218,2 31 | 20150317,0 32 | 20150311,0 33 | 20150318,0 34 | 20150405,2 35 | 20150208,1 36 | 20150302,0 37 | 20150410,0 38 | 20150316,0 39 | 20150301,1 40 | 20150219,2 41 | 20150216,0 42 | 20150106,0 43 | 20150202,0 44 | 20150120,0 45 | 20150201,1 46 | 20150130,0 47 | 20150105,0 48 | 20150320,0 49 | 20150115,0 50 | 20150225,0 51 | 20150321,1 52 | 20150121,0 53 | 20150223,2 54 | 20150331,0 55 | 20150114,0 56 | 20150117,1 57 | 20150403,0 58 | 20150110,1 59 | 20150107,0 60 | 20150330,0 61 | 20150116,0 62 | 20150213,0 63 | 20150215,0 64 | 20150203,0 65 | 20150125,1 66 | 20150404,2 67 | 20150104,0 68 | 20150314,1 69 | 20150220,2 70 | 20150205,0 71 | 20150306,0 72 | 20150327,0 73 | 20150409,0 74 | 20150226,0 75 | 20150129,0 76 | 20150109,0 77 | 20150128,0 78 | 20150401,0 79 | 20150304,0 80 | 20150310,0 81 | 20150210,0 82 | 20150118,1 83 | 20150204,0 84 | 20150308,1 85 | 20150313,0 86 | 20150123,0 87 | 20150217,0 88 | 20150309,0 89 | 20150325,0 90 | 20150402,0 91 | 20150211,0 92 | 20150328,1 93 | 20150408,0 94 | 20150323,0 95 | 20150209,0 96 | 20150303,0 97 | 20150228,0 98 | 20150324,0 99 | 20150407,0 100 | 20150326,0 101 | 20150126,0 102 | 20150504,0 103 | 20150512,0 104 | 20150423,0 105 | 20150519,0 106 | 20150603,0 107 | 20150529,0 108 | 20150412,1 109 | 20150413,0 110 | 20150427,0 111 | 20150717,0 112 | 20150613,1 113 | 20150522,0 114 | 20150629,0 115 | 20150526,0 116 | 20150414,0 117 | 20150527,0 118 | 20150719,1 119 | 20150530,1 120 | 20150502,2 121 | 20150626,0 122 | 20150428,0 123 | 20150521,0 124 | 20150624,0 125 | 20150429,0 126 | 20150528,0 127 | 20150422,0 128 | 20150627,1 129 | 20150517,1 130 | 20150419,1 131 | 20150610,0 132 | 20150614,1 133 | 20150424,0 134 | 20150709,0 135 | 20150710,0 136 | 20150417,0 137 | 20150702,0 138 | 20150617,0 139 | 20150415,0 140 | 20150707,0 141 | 20150509,1 142 | 20150606,1 143 | 20150514,0 144 | 20150703,0 145 | 20150704,1 146 | 20150515,0 147 | 20150621,2 148 | 20150525,0 149 | 20150620,2 150 | 20150604,0 151 | 20150615,0 152 | 20150430,0 153 | 20150616,0 154 | 20150418,1 155 | 20150630,0 156 | 20150507,0 157 | 20150622,2 158 | 20150520,0 159 | 20150609,0 160 | 20150625,0 161 | 20150508,0 162 | 20150524,1 163 | 20150612,0 164 | 20150718,1 165 | 20150426,1 166 | 20150516,1 167 | 20150619,0 168 | 20150420,0 169 | 20150711,1 170 | 20150623,0 171 | 20150510,1 172 | 20150605,0 173 | 20150513,0 174 | 20150712,1 175 | 20150716,0 176 | 20150607,1 177 | 20150506,0 178 | 20150705,1 179 | 20150618,0 180 | 20150503,2 181 | 20150715,0 182 | 20150706,0 183 | 20150611,0 184 | 20150505,0 185 | 20150421,0 186 | 20150628,1 187 | 20150411,1 188 | 20150701,0 189 | 20150608,0 190 | 20150602,0 191 | 20150416,0 192 | 20150714,0 193 | 20150511,0 194 | 20150708,0 195 | 20150501,2 196 | 20150713,0 197 | 20150601,0 198 | 20150425,1 199 | 20150523,1 200 | 20150518,0 201 | 20150531,1 202 | 20151025,1 203 | 20150907,0 204 | 20150913,1 205 | 20151003,2 206 | 20150721,0 207 | 20150929,0 208 | 20150924,0 209 | 20150815,1 210 | 20150911,0 211 | 20150825,0 212 | 20150901,0 213 | 20150917,0 214 | 20151021,0 215 | 20150916,0 216 | 20151006,2 217 | 20150808,1 218 | 20150809,1 219 | 20150912,1 220 | 20150722,0 221 | 20151015,0 222 | 20150801,1 223 | 20150821,0 224 | 20150728,0 225 | 20151001,2 226 | 20151022,0 227 | 20150905,2 228 | 20150915,0 229 | 20150830,1 230 | 20151002,2 231 | 20151011,1 232 | 20150816,1 233 | 20150807,0 234 | 20151016,0 235 | 20150903,2 236 | 20150914,0 237 | 20151014,0 238 | 20151004,2 239 | 20150829,1 240 | 20150720,0 241 | 20150930,0 242 | 20150806,0 243 | 20151024,1 244 | 20150902,0 245 | 20150817,0 246 | 20150730,0 247 | 20150921,0 248 | 20151009,0 249 | 20150810,0 250 | 20150920,1 251 | 20150724,0 252 | 20150805,0 253 | 20150824,0 254 | 20150822,1 255 | 20150922,0 256 | 20150909,0 257 | 20150731,0 258 | 20150812,0 259 | 20150727,0 260 | 20151012,0 261 | 20150831,0 262 | 20150928,0 263 | 20150729,0 264 | 20150906,0 265 | 20150919,1 266 | 20150814,0 267 | 20150726,1 268 | 20151005,2 269 | 20150827,0 270 | 20150823,1 271 | 20150923,0 272 | 20151017,1 273 | 20150904,2 274 | 20151027,0 275 | 20150927,2 276 | 20150820,0 277 | 20150804,0 278 | 20150918,0 279 | 20150826,0 280 | 20151018,1 281 | 20150819,0 282 | 20151026,0 283 | 20151023,0 284 | 20150802,1 285 | 20151020,0 286 | 20150723,0 287 | 20150818,0 288 | 20151013,0 289 | 20151010,0 290 | 20151007,2 291 | 20150811,0 292 | 20150925,0 293 | 20150926,1 294 | 20150803,0 295 | 20150908,0 296 | 20151008,0 297 | 20151019,0 298 | 20150725,1 299 | 20150813,0 300 | 20150828,0 301 | 20150910,0 302 | 20151103,0 303 | 20160104,0 304 | 20160103,2 305 | 20151106,0 306 | 20151127,0 307 | 20151219,1 308 | 20151128,1 309 | 20160121,0 310 | 20151110,0 311 | 20151109,0 312 | 20160124,1 313 | 20151107,1 314 | 20151031,1 315 | 20151210,0 316 | 20151115,1 317 | 20151123,0 318 | 20160204,0 319 | 20151126,0 320 | 20151229,0 321 | 20151118,0 322 | 20160128,0 323 | 20151101,1 324 | 20151028,0 325 | 20151224,0 326 | 20151207,0 327 | 20160118,0 328 | 20151102,0 329 | 20151112,0 330 | 20151216,0 331 | 20160108,0 332 | 20160127,0 333 | 20151120,0 334 | 20151121,1 335 | 20151203,0 336 | 20160201,0 337 | 20151108,1 338 | 20160126,0 339 | 20160110,1 340 | 20160114,0 341 | 20151105,0 342 | 20151030,0 343 | 20160102,2 344 | 20160105,0 345 | 20151220,1 346 | 20151221,0 347 | 20151029,0 348 | 20151201,0 349 | 20151125,0 350 | 20160202,0 351 | 20160117,1 352 | 20160109,1 353 | 20160119,0 354 | 20160129,0 355 | 20151122,1 356 | 20151213,1 357 | 20151215,0 358 | 20151223,0 359 | 20151209,0 360 | 20160111,0 361 | 20160113,0 362 | 20151119,0 363 | 20151202,0 364 | 20151111,0 365 | 20151225,0 366 | 20151231,0 367 | 20151117,0 368 | 20160107,0 369 | 20151204,0 370 | 20160115,0 371 | 20160131,1 372 | 20151116,0 373 | 20151129,1 374 | 20151218,0 375 | 20151208,0 376 | 20160122,0 377 | 20151217,0 378 | 20151230,0 379 | 20151114,1 380 | 20160203,0 381 | 20160123,1 382 | 20160112,0 383 | 20151222,0 384 | 20151104,0 385 | 20151124,0 386 | 20151226,1 387 | 20151130,0 388 | 20151206,1 389 | 20151205,1 390 | 20151228,0 391 | 20160130,1 392 | 20160106,0 393 | 20160116,1 394 | 20151214,0 395 | 20160101,2 396 | 20151211,0 397 | 20151212,1 398 | 20151113,0 399 | 20160125,0 400 | 20160120,0 401 | 20151227,1 402 | 20160312,1 403 | 20160505,0 404 | 20160307,0 405 | 20160324,0 406 | 20160221,1 407 | 20160226,0 408 | 20160318,0 409 | 20160214,0 410 | 20160328,0 411 | 20160212,2 412 | 20160308,0 413 | 20160325,0 414 | 20160403,1 415 | 20160418,0 416 | 20160215,0 417 | 20160216,0 418 | 20160321,0 419 | 20160428,0 420 | 20160501,2 421 | 20160314,0 422 | 20160310,0 423 | 20160417,1 424 | 20160306,1 425 | 20160229,0 426 | 20160401,0 427 | 20160503,0 428 | 20160316,0 429 | 20160419,0 430 | 20160420,0 431 | 20160504,0 432 | 20160207,2 433 | 20160410,1 434 | 20160217,0 435 | 20160416,1 436 | 20160219,0 437 | 20160225,0 438 | 20160326,1 439 | 20160424,1 440 | 20160510,0 441 | 20160423,1 442 | 20160512,0 443 | 20160224,0 444 | 20160313,1 445 | 20160228,1 446 | 20160220,1 447 | 20160413,0 448 | 20160422,0 449 | 20160427,0 450 | 20160320,1 451 | 20160425,0 452 | 20160331,0 453 | 20160317,0 454 | 20160208,2 455 | 20160412,0 456 | 20160421,0 457 | 20160415,0 458 | 20160409,1 459 | 20160305,1 460 | 20160302,0 461 | 20160309,0 462 | 20160211,2 463 | 20160205,0 464 | 20160227,1 465 | 20160206,0 466 | 20160329,0 467 | 20160507,1 468 | 20160323,0 469 | 20160223,0 470 | 20160311,0 471 | 20160405,0 472 | 20160430,1 473 | 20160218,0 474 | 20160327,1 475 | 20160315,0 476 | 20160209,2 477 | 20160330,0 478 | 20160304,0 479 | 20160411,0 480 | 20160210,2 481 | 20160513,0 482 | 20160514,1 483 | 20160408,0 484 | 20160319,1 485 | 20160429,0 486 | 20160502,2 487 | 20160303,0 488 | 20160414,0 489 | 20160407,0 490 | 20160426,0 491 | 20160404,2 492 | 20160222,0 493 | 20160508,1 494 | 20160506,0 495 | 20160509,0 496 | 20160301,0 497 | 20160406,0 498 | 20160511,0 499 | 20160402,1 500 | 20160322,0 501 | 20160213,2 502 | 20160701,0 503 | 20160810,0 504 | 20160804,0 505 | 20160623,0 506 | 20160606,0 507 | 20160525,0 508 | 20160705,0 509 | 20160718,0 510 | 20160727,0 511 | 20160814,1 512 | 20160723,1 513 | 20160526,0 514 | 20160519,0 515 | 20160708,0 516 | 20160521,1 517 | 20160607,0 518 | 20160816,0 519 | 20160712,0 520 | 20160610,2 521 | 20160617,0 522 | 20160703,1 523 | 20160717,1 524 | 20160603,0 525 | 20160820,1 526 | 20160624,0 527 | 20160621,0 528 | 20160520,0 529 | 20160729,0 530 | 20160627,0 531 | 20160614,0 532 | 20160806,1 533 | 20160801,0 534 | 20160714,0 535 | 20160528,1 536 | 20160805,0 537 | 20160702,1 538 | 20160626,1 539 | 20160517,0 540 | 20160615,0 541 | 20160523,0 542 | 20160710,1 543 | 20160731,1 544 | 20160721,0 545 | 20160609,2 546 | 20160619,1 547 | 20160724,1 548 | 20160628,0 549 | 20160605,1 550 | 20160630,0 551 | 20160819,0 552 | 20160715,0 553 | 20160713,0 554 | 20160527,0 555 | 20160813,1 556 | 20160807,1 557 | 20160704,0 558 | 20160821,1 559 | 20160613,0 560 | 20160725,0 561 | 20160604,1 562 | 20160802,0 563 | 20160611,2 564 | 20160612,0 565 | 20160531,0 566 | 20160515,1 567 | 20160719,0 568 | 20160815,0 569 | 20160728,0 570 | 20160524,0 571 | 20160811,0 572 | 20160808,0 573 | 20160709,1 574 | 20160822,0 575 | 20160716,1 576 | 20160726,0 577 | 20160620,0 578 | 20160818,0 579 | 20160608,0 580 | 20160616,0 581 | 20160711,0 582 | 20160706,0 583 | 20160622,0 584 | 20160720,0 585 | 20160707,0 586 | 20160809,0 587 | 20160812,0 588 | 20160601,0 589 | 20160629,0 590 | 20160516,0 591 | 20160522,1 592 | 20160803,0 593 | 20160618,1 594 | 20160530,0 595 | 20160518,0 596 | 20160625,1 597 | 20160730,1 598 | 20160817,0 599 | 20160602,0 600 | 20160529,1 601 | 20160722,0 602 | 20160906,0 603 | 20160923,0 604 | 20161001,2 605 | 20161117,0 606 | 20161105,1 607 | 20161023,1 608 | 20161002,2 609 | 20161015,1 610 | 20160904,1 611 | 20160916,2 612 | 20161125,0 613 | 20160901,0 614 | 20161005,2 615 | 20161017,0 616 | 20161113,1 617 | 20160911,1 618 | 20161107,0 619 | 20161123,0 620 | 20161121,0 621 | 20161114,0 622 | 20161118,0 623 | 20161026,0 624 | 20161108,0 625 | 20161115,0 626 | 20160924,1 627 | 20161012,0 628 | 20160918,0 629 | 20160907,0 630 | 20161019,0 631 | 20160910,1 632 | 20161103,0 633 | 20161122,0 634 | 20160905,0 635 | 20161009,0 636 | 20160915,2 637 | 20161112,1 638 | 20161031,0 639 | 20160902,0 640 | 20161116,0 641 | 20160903,1 642 | 20161106,1 643 | 20160930,0 644 | 20161109,0 645 | 20160917,2 646 | 20161127,1 647 | 20161011,0 648 | 20160828,1 649 | 20160922,0 650 | 20161129,0 651 | 20161124,0 652 | 20160929,0 653 | 20161021,0 654 | 20161025,0 655 | 20161120,1 656 | 20161102,0 657 | 20161022,1 658 | 20161003,2 659 | 20161030,1 660 | 20161024,0 661 | 20161007,2 662 | 20160831,0 663 | 20160914,0 664 | 20160919,0 665 | 20160824,0 666 | 20160909,0 667 | 20160913,0 668 | 20160925,1 669 | 20160921,0 670 | 20161027,0 671 | 20160926,0 672 | 20161111,0 673 | 20161013,0 674 | 20160827,1 675 | 20160920,0 676 | 20160912,0 677 | 20161028,0 678 | 20161104,0 679 | 20160830,0 680 | 20161110,0 681 | 20161128,0 682 | 20160826,0 683 | 20161018,0 684 | 20161010,0 685 | 20161130,0 686 | 20161119,1 687 | 20161008,0 688 | 20161006,2 689 | 20161029,1 690 | 20160829,0 691 | 20161016,1 692 | 20160928,0 693 | 20160823,0 694 | 20161126,1 695 | 20161004,2 696 | 20160927,0 697 | 20161014,0 698 | 20161101,0 699 | 20161020,0 700 | 20160908,0 701 | 20160825,0 702 | 20161213,0 703 | 20161223,0 704 | 20161208,0 705 | 20161210,1 706 | 20161203,1 707 | 20161226,0 708 | 20161218,1 709 | 20161207,0 710 | 20161201,0 711 | 20161222,0 712 | 20161231,1 713 | 20161220,0 714 | 20161214,0 715 | 20161227,0 716 | 20161229,0 717 | 20161230,0 718 | 20161204,1 719 | 20161202,0 720 | 20161209,0 721 | 20161228,0 722 | 20161217,1 723 | 20161221,0 724 | 20161211,1 725 | 20161215,0 726 | 20161219,0 727 | 20161205,0 728 | 20161212,0 729 | 20161216,0 730 | 20161206,0 731 | 20161224,1 732 | 20161225,1 733 | -------------------------------------------------------------------------------- /dataset/yangzhong.csv: -------------------------------------------------------------------------------- 1 | 2015-01-01,8,-4,晴,西南风,小于3级 2 | 2015-01-02,13,0,多云转晴,东南风,小于3级 3 | 2015-01-03,16,3,多云,南风,小于3级 4 | 2015-01-04,15,4,晴转阴,东风,3-4级 5 | 2015-01-05,10,2,小雨转雨夹雪,西北风,4-5级转3-4级 6 | 2015-01-06,7,-1,阴转多云,北风,小于3级 7 | 2015-01-07,7,-2,晴,西北风,小于3级 8 | 2015-01-08,12,-1,晴,北风,小于3级 9 | 2015-01-09,14,1,晴转多云,西南风,小于3级 10 | 2015-01-10,14,1,晴,西南风,小于3级 11 | 2015-01-11,10,1,多云,东北风,小于3级 12 | 2015-01-12,8,3,阴,东风,小于3级 13 | 2015-01-13,8,4,小到中雨转多云,东北风,小于3级 14 | 2015-01-14,10,4,阴转多云,西北风,小于3级 15 | 2015-01-15,11,3,多云,西南风,小于3级 16 | 2015-01-16,7,-1,多云转晴,西北风,3-4级 17 | 2015-01-17,9,-1,多云,东风,小于3级 18 | 2015-01-18,11,0,多云,西风,小于3级 19 | 2015-01-19,10,2,多云转阴,东风,小于3级 20 | 2015-01-20,11,4,多云,东南风,小于3级转3-4级 21 | 2015-01-21,7,0,多云,西北风,小于3级 22 | 2015-01-22,11,-2,晴,东北风,小于3级 23 | 2015-01-23,15,2,多云转阴,东南风,小于3级 24 | 2015-01-24,11,7,阴转小雨,东风,3-4级 25 | 2015-01-25,10,6,阴,西北风,小于3级转3-4级 26 | 2015-01-26,6,2,阴,东北风,4-5级 27 | 2015-01-27,3,0,小雪,东北风,4-5级转小于3级 28 | 2015-01-28,2,-1,大雪,东北风,3-4级 29 | 2015-01-29,3,-1,小雪转阴,西北风,小于3级 30 | 2015-01-30,4,-2,多云,北风,小于3级 31 | 2015-01-31,5,-1,阴转雨夹雪,东北风,小于3级 32 | 2015-02-01,7,1,阴,东风,小于3级 33 | 2015-02-02,7,2,阴转多云,东风,小于3级 34 | 2015-02-03,7,2,阴转多云,西北风,3-4级 35 | 2015-02-04,7,-2,多云转晴,西北风,3-4级 36 | 2015-02-05,9,-2,晴转多云,西南风,小于3级 37 | 2015-02-06,12,-1,多云,南风,小于3级转3-4级 38 | 2015-02-07,6,0,多云转晴,西北风,4-5级 39 | 2015-02-08,6,-4,晴,东北风,小于3级 40 | 2015-02-09,15,0,晴,西南风,3-4级 41 | 2015-02-10,15,0,晴,西南风,3-4级 42 | 2015-02-11,15,0,晴,西南风,3-4级 43 | 2015-02-12,15,0,晴,西南风,3-4级 44 | 2015-02-13,19,3,晴转多云,东南风,小于3级 45 | 2015-02-14,15,10,阵雨,东南风,小于3级 46 | 2015-02-15,15,9,阴转多云,西北风,3-4级 47 | 2015-02-16,14,3,多云转晴,西南风,小于3级 48 | 2015-02-17,12,3,晴,东风,4-5级 49 | 2015-02-18,11,1,多云,东北风,小于3级 50 | 2015-02-19,10,4,阴转小到中雨,东风,3-4级转小于3级 51 | 2015-02-20,11,8,小雨转阴,西风,3-4级 52 | 2015-02-21,11,3,阴转多云,西北风,3-4级 53 | 2015-02-22,10,4,小雨转阴,东风,小于3级 54 | 2015-02-23,12,4,阴,东风,小于3级 55 | 2015-02-24,10,6,小到中雨转小雨,东南风,3-4级 56 | 2015-02-25,10,6,阴,暂无实况,小于3级转3-4级 57 | 2015-02-26,6,3,阴转小到中雨,东风,小于3级 58 | 2015-02-27,4,2,中雨,东北风,4-5级 59 | 2015-02-28,9,1,阴转多云,西北风,3-4级 60 | 2015-03-01,13,2,晴转多云,南风,3-4级 61 | 2015-03-02,12,6,小雨转多云,东南风,3-4级转4-5级 62 | 2015-03-03,6,1,多云,东北风,4-5级转3-4级 63 | 2015-03-04,5,1,阴转雨夹雪,东风,小于3级 64 | 2015-03-05,8,2,雨夹雪转阴,东北风,小于3级 65 | 2015-03-06,12,3,多云,东南风,小于3级 66 | 2015-03-07,10,5,阴转小雨,东风,3-4级 67 | 2015-03-08,9,4,阴转多云,北风,小于3级转4-5级 68 | 2015-03-09,7,0,多云,东北风,4-5级转3-4级 69 | 2015-03-10,11,-1,多云,东南风,小于3级 70 | 2015-03-11,16,2,晴,南风,小于3级 71 | 2015-03-12,19,6,晴转多云,南风,3-4级转小于3级 72 | 2015-03-13,15,7,多云转小雨,东北风,3-4级 73 | 2015-03-14,16,8,阴,东风,3-4级 74 | 2015-03-15,17,10,阴转多云,东风,小于3级 75 | 2015-03-16,16,11,阴转中雨,东南风,小于3级 76 | 2015-03-17,14,12,大雨转小到中雨,东风,3-4级 77 | 2015-03-18,15,9,小雨转阴,北风,3-4级 78 | 2015-03-19,14,9,小雨,暂无实况,3-4级 79 | 2015-03-20,20,9,阴转多云,西风,小于3级 80 | 2015-03-21,19,8,多云,西南风,小于3级 81 | 2015-03-22,16,7,多云转阴,东风,4-5级转3-4级 82 | 2015-03-23,15,6,多云,东风,小于3级 83 | 2015-03-24,17,7,多云,东风,3-4级 84 | 2015-03-25,15,6,多云转阴,东风,3-4级 85 | 2015-03-26,16,9,小雨转多云,东风,3-4级 86 | 2015-03-27,20,7,多云转晴,东南风,3-4级转小于3级 87 | 2015-03-28,23,10,多云,南风,小于3级 88 | 2015-03-29,20,14,阴转阵雨,东风,3-4级 89 | 2015-03-30,26,16,多云,东南风,3-4级 90 | 2015-03-31,23,17,多云转阴,东风,3-4级 91 | 2015-04-01,25,14,阴转中雨,东风,5-6级转4-5级 92 | 2015-04-02,19,11,阴,西北风,3-4级转4-5级 93 | 2015-04-03,14,10,雷阵雨,东风,3-4级转小于3级 94 | 2015-04-04,13,10,小雨转小到中雨,西北风,小于3级 95 | 2015-04-05,11,7,中雨,北风,4-5级转3-4级 96 | 2015-04-06,7,4,小雨,东北风,4-5级转3-4级 97 | 2015-04-07,12,2,阴转多云,西风,小于3级 98 | 2015-04-08,15,3,多云,东风,3-4级 99 | 2015-04-09,17,6,晴转多云,东风,小于3级 100 | 2015-04-10,19,7,多云,北风,3-4级 101 | 2015-04-11,21,9,多云转雷阵雨,东风,3-4级 102 | 2015-04-12,13,7,多云转雷阵雨,西北风,5-6级 103 | 2015-04-13,19,6,阴转晴,西风,5-6级转3-4级 104 | 2015-04-14,23,10,晴,西南风,4-5级 105 | 2015-04-15,21,13,阴转多云,西南风,3-4级 106 | 2015-04-16,22,10,晴转多云,东南风,4-5级 107 | 2015-04-17,26,15,阴,东南风,4-5级转3-4级 108 | 2015-04-18,24,16,阴转雷阵雨,北风,3-4级 109 | 2015-04-19,17,10,中雨转阵雨,北风,4-5级转3-4级 110 | 2015-04-20,20,8,多云,暂无实况,小于3级 111 | 2015-04-21,24,8,多云转晴,南风,小于3级 112 | 2015-04-22,26,12,多云,东南风,3-4级 113 | 2015-04-23,27,13,多云,东南风,小于3级 114 | 2015-04-24,27,13,多云转晴,西南风,小于3级 115 | 2015-04-25,28,14,晴,暂无实况,小于3级 116 | 2015-04-26,28,15,晴转多云,东南风,4-5级 117 | 2015-04-27,29,18,阴转雷阵雨,南风,小于3级 118 | 2015-04-28,22,17,雷阵雨转多云,暂无实况,3-4级 119 | 2015-04-29,25,14,多云,西风,小于3级 120 | 2015-04-30,27,14,晴转多云,东风,小于3级 121 | 2015-05-01,20,16,小到中雨转中到大雨,东风,小于3级 122 | 2015-05-02,25,15,阴转多云,西北风,3-4级 123 | 2015-05-03,24,15,多云,暂无实况,小于3级 124 | 2015-05-04,24,12,阴转多云,东南风,3-4级转小于3级 125 | 2015-05-05,23,15,阴,东南风,3-4级 126 | 2015-05-06,27,16,多云,东南风,小于3级 127 | 2015-05-07,27,17,多云,北风,3-4级 128 | 2015-05-08,26,15,多云,西南风,3-4级 129 | 2015-05-09,26,15,多云,西南风,3-4级 130 | 2015-05-10,26,15,多云,西南风,3-4级 131 | 2015-05-11,26,15,多云,西南风,3-4级 132 | 2015-05-12,26,15,多云,西南风,3-4级 133 | 2015-05-13,29,20,晴~多云,东南风,3-4级 134 | 2015-05-14,31,22,多云~中雨,西南风,3-4级 135 | 2015-05-15,25,16,小到中雨~阴,东北风,3-4级 136 | 2015-05-16,24,15,多云,东南风,3-4级 137 | 2015-05-17,26,18,多云~雷雨,东南风,3-4级 138 | 2015-05-18,25,17,小雨~多云,东南风,3-4级 139 | 2015-05-19,29,19,多云,东南风,3-4级 140 | 2015-05-20,27,15,多云,东南风,3-4级 141 | 2015-05-21,25,15,晴~多云,东南风,3-4级 142 | 2015-05-22,28,18,晴,东北风,3-4级 143 | 2015-05-23,26,18,阴~,东北风,3-4级 144 | 2015-05-24,27,19,多云,东北风,3-4级 145 | 2015-05-25,29,19,晴~,东北风,3-4级 146 | 2015-05-26,27,20,多云~,东南风,3-4级 147 | 2015-05-27,22,17,阵雨,东北风,3-4级 148 | 2015-05-28,27,20,阴,东南风,3-4级 149 | 2015-05-29,23,20,中到大雨~中雨,东北风,3-4级 150 | 2015-05-30,26,19,多云~晴,东北风,3-4级 151 | 2015-05-31,27,19,多云,东南风,3-4级 152 | 2015-06-01,31,23,多云~雷雨,东南风,3-4级 153 | 2015-06-02,25,19,暴雨~小到中雨,东北风,3-4级 154 | 2015-06-03,26,16,阴~晴,东南风,3-4级 155 | 2015-06-04,26,19,多云,西北风,3-4级 156 | 2015-06-05,28,19,晴,西南风,3-4级 157 | 2015-06-06,32,21,多云,东南风,3-4级 158 | 2015-06-07,27,21,雷雨~大雨,东北风,3-4级 159 | 2015-06-08,27,20,阴,西南风,3-4级 160 | 2015-06-09,29,22,阴,东南风,3-4级 161 | 2015-06-10,28,21,阴~多云,东南风,3-4级 162 | 2015-06-11,33,22,多云,西南风,3-4级 163 | 2015-06-12,34,23,晴~多云,西南风,3-4级 164 | 2015-06-13,33,25,多云,西南风,3-4级 165 | 2015-06-14,32,23,多云~阴,东北风,3-4级 166 | 2015-06-15,28,21,雷雨,东南风,3-4级 167 | 2015-06-16,21,18,大雨~暴雨,东北风,3-4级 168 | 2015-06-17,22,18,小雨~阴,东北风,3-4级 169 | 2015-06-18,28,19,多云,东南风,3-4级 170 | 2015-06-19,29,21,多云,东南风,3-4级 171 | 2015-06-20,30,21,多云~阴,东北风,3-4级 172 | 2015-06-21,26,21,阵雨,东南风,3-4级 173 | 2015-06-22,26,21,阴,东南风,3-4级 174 | 2015-06-23,25,23,阵雨~小雨,东南风,3-4级 175 | 2015-06-24,26,22,小雨~小到中雨,东北风,3-4级 176 | 2015-06-25,30,23,阴~中雨,西北风,3-4级 177 | 2015-06-26,26,21,雷雨~大雨,东南风,3-4级 178 | 2015-06-27,23,19,暴雨,东南风,3-4级 179 | 2015-06-28,24,21,中到大雨~小雨,东南风,3-4级 180 | 2015-06-29,24,21,大雨~雷雨,东南风,3-4级 181 | 2015-06-30,27,21,中到大雨~小雨,东南风,3-4级 182 | 2015-07-01,29,21,多云~晴,西南风,3-4级 183 | 2015-07-02,32,23,多云,西南风,3-4级 184 | 2015-07-03,30,21,多云,东南风,3-4级 185 | 2015-07-04,28,19,阴,东南风,3-4级 186 | 2015-07-05,28,19,阴,东南风,3-4级 187 | 2015-07-06,22,18,小雨~中到大雨,东北风,3-4级 188 | 2015-07-07,24,19,雷雨,东北风,3-4级 189 | 2015-07-08,24,20,小到中雨~小雨,东北风,3-4级 190 | 2015-07-09,27,22,阴,东北风,3-4级 191 | 2015-07-10,31,24,阴~阵雨,东北风,3-4级 192 | 2015-07-11,27,23,大雨~大到暴雨,东北风,3-4级 193 | 2015-07-12,29,23,阴~多云,东南风,3-4级 194 | 2015-07-13,35,25,多云,东南风,3-4级 195 | 2015-07-14,35,25,晴~多云,东南风,3-4级 196 | 2015-07-15,32,24,多云,东北风,3-4级 197 | 2015-07-16,29,23,雷阵雨~小到中雨,东南风,3-4级 198 | 2015-07-17,25,22,中雨,东南风,3-4级 199 | 2015-07-18,27,23,雷阵雨,东南风,3-4级 200 | 2015-07-19,26,24,大雨~小雨,东南风,3-4级 201 | 2015-07-20,28,24,阴,东南风,3-4级 202 | 2015-07-21,30,24,多云,东南风,3-4级 203 | 2015-07-22,31,25,多云~阴,东南风,3-4级 204 | 2015-07-23,31,25,雷阵雨,东南风,3-4级 205 | 2015-07-24,28,24,中雨,西南风,3-4级 206 | 2015-07-25,29,25,中雨~小雨,东南风,3-4级 207 | 2015-07-26,32,26,雷阵雨~多云,西南风,3-4级 208 | 2015-07-27,35,27,多云,西南风,3-4级 209 | 2015-07-28,36,27,多云~晴,西南风,3-4级 210 | 2015-07-29,37,28,多云,西南风,3-4级 211 | 2015-07-30,35,27,晴~多云,西南风,3-4级 212 | 2015-07-31,35,27,晴,西北风,3-4级 213 | 2015-08-01,35,27,阴~多云,东南风,3-4级 214 | 2015-08-02,37,28,晴,西南风,3-4级 215 | 2015-08-03,37,28,晴,西南风,3-4级 216 | 2015-08-04,37,28,晴,西南风,3-4级 217 | 2015-08-05,37,28,多云,东南风,3-4级 218 | 2015-08-06,35,26,多云,东南风,3-4级 219 | 2015-08-07,34,26,多云,东南风,3-4级 220 | 2015-08-08,33,26,多云~雷阵雨,东北风,3-4级 221 | 2015-08-09,30,25,雷阵雨~小到中雨,东北风,3-4级 222 | 2015-08-10,28,24,暴雨~小到中雨,东南风,3-4级 223 | 2015-08-11,28,23,阴~多云,西北风,3-4级 224 | 2015-08-12,31,23,多云,东南风,3-4级 225 | 2015-08-13,30,23,多云,东南风,3-4级 226 | 2015-08-14,32,23,多云,东南风,3-4级 227 | 2015-08-15,30,24,阴,东南风,3-4级 228 | 2015-08-16,30,23,多云~晴,东南风,3-4级 229 | 2015-08-17,31,24,多云,东南风,3-4级 230 | 2015-08-18,30,23,多云,东北风,3-4级 231 | 2015-08-19,30,24,阴~大雨,东南风,3-4级 232 | 2015-08-20,26,23,阵雨~阴,无持续风向,3-4级 233 | 2015-08-21,29,23,阴~多云,东北风,3-4级 234 | 2015-08-22,28,21,阴,东南风,3-4级 235 | 2015-08-23,28,21,阴,东北风,3-4级 236 | 2015-08-24,24,20,小到中雨~多云,西北风,3-4级 237 | 2015-08-25,29,21,多云,东北风,3-4级 238 | 2015-08-26,30,22,多云,西南风,3-4级 239 | 2015-08-27,30,22,阵雨~多云,东南风,3-4级 240 | 2015-08-28,30,22,多云,东南风,3-4级 241 | 2015-08-29,31,22,多云,东南风,3-4级 242 | 2015-08-30,31,22,多云,东南风,3-4级 243 | 2015-08-31,30,22,多云,东北风,3-4级 244 | 2015-09-01,29,22,雷阵雨~多云,东北风,3-4级 245 | 2015-09-02,30,22,多云,西北风,3-4级 246 | 2015-09-03,29,22,多云,东南风,3-4级 247 | 2015-09-04,30,23,阴~中雨,东北风,3-4级 248 | 2015-09-05,28,22,阴~多云,西北风,3-4级 249 | 2015-09-06,28,21,多云,东北风,3-4级 250 | 2015-09-07,28,21,多云,东北风,3-4级 251 | 2015-09-08,28,20,多云,东北风,3-4级 252 | 2015-09-09,28,19,晴~多云,东北风,3-4级 253 | 2015-09-10,28,20,晴~多云,东南风,3-4级 254 | 2015-09-11,27,20,多云~阵雨,西北风,3-4级 255 | 2015-09-12,24,16,阴~多云,西北风,3-4级 256 | 2015-09-13,25,17,多云,东北风,3-4级 257 | 2015-09-14,26,20,多云,东南风,3-4级 258 | 2015-09-15,26,18,阴~多云,东南风,3-4级 259 | 2015-09-16,27,18,多云~晴,东北风,3-4级 260 | 2015-09-17,26,18,多云,西北风,3-4级 261 | 2015-09-18,27,19,多云,东北风,3-4级 262 | 2015-09-19,28,20,多云,东南风,3-4级 263 | 2015-09-20,26,19,多云~晴,东南风,3-4级 264 | 2015-09-21,27,20,多云,东南风,3-4级 265 | 2015-09-22,26,21,阵雨~小雨,西南风,3-4级 266 | 2015-09-23,26,19,多云,西北风,3-4级 267 | 2015-09-24,26,20,多云~小雨,东北风,3-4级 268 | 2015-09-25,27,19,阴~多云,东北风,3-4级 269 | 2015-09-26,28,18,多云~晴,东北风,3-4级 270 | 2015-09-27,27,18,晴,东北风,3-4级 271 | 2015-09-28,27,20,多云~阵雨,东北风,3-4级 272 | 2015-09-29,25,21,阵雨,东北风,3-4级 273 | 2015-09-30,26,19,阵雨~中到大雨,东南风,3-4级 274 | 2015-10-01,23,14,多云~晴,西北风,3-4级 275 | 2015-10-02,24,15,晴,东南风,3-4级 276 | 2015-10-03,25,17,多云,东南风,3-4级 277 | 2015-10-04,25,19,多云~小雨,西南风,3-4级 278 | 2015-10-05,23,18,阴~多云,东南风,3-4级 279 | 2015-10-06,24,19,阴~阵雨,东南风,3-4级 280 | 2015-10-07,22,18,阵雨,东北风,3-4级 281 | 2015-10-08,19,15,阴~多云,东北风,3-4级 282 | 2015-10-09,22,12,多云~晴,西南风,3-4级 283 | 2015-10-10,23,10,晴~多云,西北风,3-4级 284 | 2015-10-11,20,10,晴,东南风,3-4级 285 | 2015-10-12,22,11,晴,东南风,3-4级 286 | 2015-10-13,24,13,晴~多云,东南风,3-4级 287 | 2015-10-14,24,13,多云,东南风,3-4级 288 | 2015-10-15,25,14,晴,西南风,3-4级 289 | 2015-10-16,26,15,晴,东南风,3-4级 290 | 2015-10-17,26,14,晴,东南风,3-4级 291 | 2015-10-18,25,14,晴~多云,东北风,3-4级 292 | 2015-10-19,24,15,多云,东南风,3-4级 293 | 2015-10-20,25,16,多云,东南风,3-4级 294 | 2015-10-21,25,16,多云,东北风,3-4级 295 | 2015-10-22,25,16,多云,东北风,3-4级 296 | 2015-10-23,25,17,多云,东北风,3-4级 297 | 2015-10-24,25,17,多云,东北风,3-4级 298 | 2015-10-25,23,16,多云~阴,东南风,3-4级 299 | 2015-10-26,24,14,小雨,西北风,3-4级 300 | 2015-10-27,19,11,多云,西南风,3-4级 301 | 2015-10-28,20,12,阴,东南风,3-4级 302 | 2015-10-29,18,12,阴~多云,西北风,3-4级 303 | 2015-10-30,18,10,多云,东北风,3-4级 304 | 2015-10-31,17,11,多云~阵雨,西北风,3-4级 305 | 2015-11-01,17,8,多云~晴,西南风,3-4级 306 | 2015-11-02,18,8,晴,东南风,3-4级 307 | 2015-11-03,20,11,多云,东南风,3-4级 308 | 2015-11-04,21,14,阴,东南风,3-4级 309 | 2015-11-05,23,16,阴~小雨,东南风,3-4级 310 | 2015-11-06,26,19,阵雨,东南风,3-4级 311 | 2015-11-07,17,12,阵雨~小雨,西北风,3-4级 312 | 2015-11-08,13,9,阵雨~多云,西北风,3-4级 313 | 2015-11-09,13,8,多云,东北风,3-4级 314 | 2015-11-10,15,9,多云,东南风,3-4级 315 | 2015-11-11,16,10,阴,东北风,3-4级 316 | 2015-11-12,16,13,小雨,西南风,3-4级 317 | 2015-11-13,16,12,阴,西北风,3-4级 318 | 2015-11-14,17,10,多云,东南风,3-4级 319 | 2015-11-15,18,11,阴~中雨,东风,3-4级 320 | 2015-11-16,17,13,阴~小雨,东北风,3-4级 321 | 2015-11-17,15,12,小雨,东北风,3-4级 322 | 2015-11-18,15,11,小雨~小到中雨,东北风,3-4级 323 | 2015-11-19,14,11,小雨~阴,东北风,3-4级 324 | 2015-11-20,16,11,阴,东南风,3-4级 325 | 2015-11-21,16,13,阴~阵雨,东北风,3-4级 326 | 2015-11-22,17,14,小雨,东北风,3-4级 327 | 2015-11-23,16,10,小雨,东北风,3-4级 328 | 2015-11-24,11,3,小雨~小到中雨,西北风,3-4级 329 | 2015-11-25,5,0,多云~晴,西北风,3-4级 330 | 2015-11-26,3,-4,晴,西北风,3-4级 331 | 2015-11-27,5,-2,晴~多云,东南风,3-4级 332 | 2015-11-28,12,3,多云~阴,东北风,3-4级 333 | 2015-11-29,16,6,多云~晴,东南风,3-4级 334 | 2015-11-30,15,5,多云,东南风,3-4级 335 | 2015-12-01,17,10,多云~阴,西北风,3-4级 336 | 2015-12-02,12,3,多云~晴,西北风,3-4级 337 | 2015-12-03,10,0,晴,西北风,3-4级 338 | 2015-12-04,9,2,晴~多云,西南风,3-4级 339 | 2015-12-05,8,4,阵雨,西南风,3-4级 340 | 2015-12-06,10,2,多云,东南风,3-4级 341 | 2015-12-07,12,3,晴,东南风,3-4级 342 | 2015-12-08,13,6,多云,东南风,3-4级 343 | 2015-12-09,12,8,小雨,东北风,3-4级 344 | 2015-12-10,12,7,阴~多云,西北风,3-4级 345 | 2015-12-11,10,1,多云,东北风,3-4级 346 | 2015-12-12,10,2,晴~多云,东北风,3-4级 347 | 2015-12-13,9,3,多云~阴,西北风,3-4级 348 | 2015-12-14,10,4,阴,西北风,3-4级 349 | 2015-12-15,10,0,多云~晴,西北风,3-4级 350 | 2015-12-16,6,-3,晴,西北风,3-4级 351 | 2015-12-17,5,-3,晴,西南风,3-4级 352 | 2015-12-18,6,-2,多云,东南风,3-4级 353 | 2015-12-19,9,2,阴~小雨,东南风,3-4级 354 | 2015-12-20,8,2,小雨~阴,西北风,3-4级 355 | 2015-12-21,8,0,多云~阴,东南风,3-4级 356 | 2015-12-22,11,6,阴,西北风,3-4级 357 | 2015-12-23,10,7,阴~小雨,东北风,3-4级 358 | 2015-12-24,10,4,小雨~阴,西北风,3-4级 359 | 2015-12-25,10,2,多云~晴,西南风,3-4级 360 | 2015-12-26,14,4,晴~多云,西南风,3-4级 361 | 2015-12-27,9,1,阴~多云,东北风,3-4级 362 | 2015-12-28,6,-1,多云,东北风,3-4级 363 | 2015-12-29,9,0,多云,东南风,3-4级 364 | 2015-12-30,11,0,晴,西北风,3-4级 365 | 2015-12-31,9,-1,晴,东北风,3-4级 366 | 2016-01-01,11,2,多云,东南风,3-4级 367 | 2016-01-02,15,7,多云~阴,东南风,3-4级 368 | 2016-01-03,16,7,多云~阴,东南风,3-4级 369 | 2016-01-04,13,7,小雨~小到中雨,东北风,3-4级 370 | 2016-01-05,10,5,小雨~阴,东北风,3-4级 371 | 2016-01-06,8,3,阴~多云,西北风,3-4级 372 | 2016-01-07,6,-1,阴~多云,西北风,3-4级 373 | 2016-01-08,7,-1,晴~多云,东北风,3-4级 374 | 2016-01-09,9,-1,多云~阴,东南风,3-4级 375 | 2016-01-10,10,4,小雨,东北风,3-4级 376 | 2016-01-11,8,1,雨夹雪,东北风,3-4级 377 | 2016-01-12,3,-3,小雪~阴,西北风,3-4级 378 | 2016-01-13,4,-2,多云,西南风,3-4级 379 | 2016-01-14,7,-2,多云,西南风,3-4级 380 | 2016-01-15,8,-2,晴,东南风,3-4级 381 | 2016-01-16,10,1,多云~小雨,东北风,3-4级 382 | 2016-01-17,8,0,小雨~多云,东北风,3-4级 383 | 2016-01-18,3,-4,晴,西北风,3-4级 384 | 2016-01-19,3,-4,多云,东北风,3-4级 385 | 2016-01-20,3,-2,阴~中到大雪,东北风,3-4级 386 | 2016-01-21,2,-2,雨夹雪~阴,东北风,3-4级 387 | 2016-01-22,3,-4,小雪,东北风,3-4级 388 | 2016-01-23,-3,-10,阴~多云,西北风,3-4级 389 | 2016-01-24,-6,-10,晴,西北风,3-4级 390 | 2016-01-25,2,-8,晴,西南风,3-4级 391 | 2016-01-26,6,-4,多云~阴,西南风,3-4级 392 | 2016-01-27,8,-1,多云~小雨,东南风,3-4级 393 | 2016-01-28,6,3,小雨,东北风,3-4级 394 | 2016-01-29,5,1,小雨~阴,东北风,3-4级 395 | 2016-01-30,5,-1,阴~多云,东北风,3-4级 396 | 2016-01-31,4,-2,雨夹雪~阴,东北风,3-4级 397 | 2016-02-01,2,-5,多云,西北风,3-4级 398 | 2016-02-02,5,-4,多云,东北风,3-4级 399 | 2016-02-03,7,-3,晴~多云,东北风,3-4级 400 | 2016-02-04,7,-2,多云,东南风,3-4级 401 | 2016-02-05,9,-2,多云~阴,西北风,3-4级 402 | 2016-02-06,7,-3,晴,西北风,3-4级 403 | 2016-02-07,10,-2,多云~晴,西北风,3-4级 404 | 2016-02-08,15,2,晴,西南风,3-4级 405 | 2016-02-09,17,2,晴~多云,东南风,3-4级 406 | 2016-02-10,20,7,多云~阴,东南风,3-4级 407 | 2016-02-11,18,11,雷阵雨~阴,东南风,3-4级 408 | 2016-02-12,18,11,阵雨~中雨,东北风,3-4级 409 | 2016-02-13,14,1,小雨~阴,西北风,3-4级 410 | 2016-02-14,6,-3,阴~多云,西北风,3-4级 411 | 2016-02-15,7,-2,晴,西北风,3-4级 412 | 2016-02-16,10,0,晴,西北风,3-4级 413 | 2016-02-17,12,2,多云,东南风,3-4级 414 | 2016-02-18,16,3,晴~多云,东南风,3-4级 415 | 2016-02-19,13,3,阴,东南风,3-4级 416 | 2016-02-20,11,-1,多云~晴,东南风,3-4级 417 | 2016-02-21,10,2,阴~小到中雨,东南风,3-4级 418 | 2016-02-22,7,4,小雨~阴,东北风,3-4级 419 | 2016-02-23,11,1,阴~多云,东北风,3-4级 420 | 2016-02-24,9,0,晴,东南风,3-4级 421 | 2016-02-25,12,2,多云,东南风,3-4级 422 | 2016-02-26,17,4,多云,西南风,3-4级 423 | 2016-02-27,19,8,晴~多云,西南风,3-4级 424 | 2016-02-28,17,2,阴~多云,东北风,3-4级 425 | 2016-02-29,10,-1,晴,东北风,3-4级 426 | 2016-03-01,13,1,晴~多云,东南风,3-4级 427 | 2016-03-02,16,7,多云,东南风,3-4级 428 | 2016-03-03,16,9,阵雨~阴,东南风,3-4级 429 | 2016-03-04,19,12,阵雨~阴,东北风,3-4级 430 | 2016-03-05,21,11,阴~多云,东北风,3-4级 431 | 2016-03-06,19,10,多云,东南风,3-4级 432 | 2016-03-07,16,6,多云~小雨,东北风,3-4级 433 | 2016-03-08,9,1,中到大雨~小雨,东北风,3-4级 434 | 2016-03-09,7,1,阴,东北风,3-4级 435 | 2016-03-10,8,-1,多云~晴,东北风,3-4级 436 | 2016-03-11,11,1,多云~晴,西南风,3-4级 437 | 2016-03-12,13,4,小雨,东南风,3-4级 438 | 2016-03-13,11,4,阴~多云,东北风,3-4级 439 | 2016-03-14,13,3,多云,东南风,3-4级 440 | 2016-03-15,14,4,多云,东南风,3-4级 441 | 2016-03-16,18,6,多云~阴,东南风,3-4级 442 | 2016-03-17,15,9,小雨,东北风,3-4级 443 | 2016-03-18,18,11,阴,西北风,3-4级 444 | 2016-03-19,19,7,多云,东南风,3-4级 445 | 2016-03-20,17,6,多云,东北风,3-4级 446 | 2016-03-21,17,6,多云,东北风,3-4级 447 | 2016-03-22,16,9,多云~阴,东南风,3-4级 448 | 2016-03-23,14,4,多云,东北风,3-4级 449 | 2016-03-24,12,3,多云,东北风,3-4级 450 | 2016-03-25,15,3,多云,东北风,3-4级 451 | 2016-03-26,15,5,阴~多云,西北风,3-4级 452 | 2016-03-27,18,7,晴,西风,3-4级 453 | 2016-03-28,20,9,多云,西南风,3-4级 454 | 2016-03-29,20,13,多云,西南风,3-4级 455 | 2016-03-30,21,12,阴~小雨,西南风,3-4级 456 | 2016-03-31,20,10,多云~晴,西南风,3-4级 457 | 2016-04-01,24,14,多云,东南风,3-4级 458 | 2016-04-02,19,13,雷阵雨~中雨,东北风,3-4级 459 | 2016-04-03,14,9,阵雨~阴,东北风,3-4级 460 | 2016-04-04,18,10,多云,东北风,3-4级 461 | 2016-04-05,16,11,阵雨~中雨,东南风,3-4级 462 | 2016-04-06,14,12,中到大雨~中雨,西北风,3-4级 463 | 2016-04-07,20,10,阴~多云,西南风,3-4级 464 | 2016-04-08,23,13,多云,东南风,3-4级 465 | 2016-04-09,25,13,多云,东南风,3-4级 466 | 2016-04-10,26,11,多云,东北风,3-4级 467 | 2016-04-11,21,12,多云~阴,东南风,3-4级 468 | 2016-04-12,23,14,多云,东南风,3-4级 469 | 2016-04-13,27,16,多云,东南风,3-4级 470 | 2016-04-14,27,15,多云,东南风,3-4级 471 | 2016-04-15,25,16,多云~小到中雨,东南风,3-4级 472 | 2016-04-16,20,12,中到大雨~阴,西北风,3-4级 473 | 2016-04-17,20,11,阴,东北风,3-4级 474 | 2016-04-18,22,12,多云,西南风,3-4级 475 | 2016-04-19,24,15,多云~雷阵雨,东南风,3-4级 476 | 2016-04-20,18,14,大雨~小雨,西北风,3-4级 477 | 2016-04-21,25,15,多云,西南风,3-4级 478 | 2016-04-22,27,16,多云~雷阵雨,东南风,3-4级 479 | 2016-04-23,18,13,小雨~阴,东南风,3-4级 480 | 2016-04-24,21,13,阴~多云,东北风,3-4级 481 | 2016-04-25,20,13,阴,东南风,3-4级 482 | 2016-04-26,17,12,小雨~小到中雨,东北风,3-4级 483 | 2016-04-27,17,11,阵雨~多云,西北风,3-4级 484 | 2016-04-28,23,12,多云,西北风,3-4级 485 | 2016-04-29,27,15,晴,西南风,3-4级 486 | 2016-04-30,29,17,晴~多云,东南风,3-4级 487 | 2016-05-01,30,20,多云~雷阵雨,东南风,3-4级 488 | 2016-05-02,26,16,中到大雨~小雨,东南风,3-4级 489 | 2016-05-03,25,14,晴,西南风,3-4级 490 | 2016-05-04,30,20,多云~阴,东南风,3-4级 491 | 2016-05-05,25,20,中雨~阴,西南风,3-4级 492 | 2016-05-06,28,17,多云,东北风,3-4级 493 | 2016-05-07,19,15,阴~小雨,东北风,3-4级 494 | 2016-05-08,17,13,阵雨,东南风,3-4级 495 | 2016-05-09,20,15,阵雨,东北风,3-4级 496 | 2016-05-10,21,13,多云~晴,西北风,3-4级 497 | 2016-05-11,27,16,晴,东南风,3-4级 498 | 2016-05-12,30,16,多云,东南风,3-4级 499 | 2016-05-13,20,15,阴~多云,东北风,3-4级 500 | 2016-05-14,26,16,多云~阴,东南风,3-4级 501 | 2016-05-15,18,13,中雨~多云,西北风,3-4级 502 | 2016-05-16,25,13,晴,东南风,3-4级 503 | 2016-05-17,26,14,多云,东南风,3-4级 504 | 2016-05-18,25,14,多云,东北风,3-4级 505 | 2016-05-19,27,17,多云,东南风,3-4级 506 | 2016-05-20,24,17,阴~小到中雨,东北风,3-4级 507 | 2016-05-21,19,16,中雨~小雨,东北风,3-4级 508 | 2016-05-22,19,15,小雨~阴,西北风,3-4级 509 | 2016-05-23,23,17,阴~小雨,西南风,3-4级 510 | 2016-05-24,26,17,阴~多云,西南风,3-4级 511 | 2016-05-25,29,21,多云~雷阵雨,西南风,3-4级 512 | 2016-05-26,26,20,雷阵雨~小到中雨,西南风,3-4级 513 | 2016-05-27,23,19,中雨~中到大雨,东北风,3-4级 514 | 2016-05-28,21,19,阵雨,东南风,3-4级 515 | 2016-05-29,24,17,阴~多云,东南风,3-4级 516 | 2016-05-30,27,20,多云~小雨,东南风,3-4级 517 | 2016-05-31,29,21,雷阵雨~大到暴雨,东北风,3-4级 518 | 2016-06-01,21,16,小雨~阴,东南风,3-4级 519 | 2016-06-02,22,18,多云~阵雨,东北风,3-4级 520 | 2016-06-03,23,18,阵雨,东北风,3-4级 521 | 2016-06-04,22,16,阴~多云,西北风,3-4级 522 | 2016-06-05,26,18,多云,东南风,3-4级 523 | 2016-06-06,27,19,雷阵雨~中雨,东北风,3-4级 524 | 2016-06-07,26,21,雷阵雨,东北风,3-4级 525 | 2016-06-08,24,19,阵雨~多云,东北风,3-4级 526 | 2016-06-09,25,19,阴,东北风,3-4级 527 | 2016-06-10,27,22,多云,东北风,3-4级 528 | 2016-06-11,30,23,阴~雷阵雨,西北风,3-4级 529 | 2016-06-12,26,20,雷阵雨~多云,东南风,3-4级 530 | 2016-06-13,31,22,多云,东南风,3-4级 531 | 2016-06-14,33,25,多云~阴,西南风,3-4级 532 | 2016-06-15,30,22,雷阵雨~阴,西北风,3-4级 533 | 2016-06-16,30,20,多云~晴,西南风,3-4级 534 | 2016-06-17,32,23,晴~多云,东南风,3-4级 535 | 2016-06-18,33,25,多云~雷阵雨,西南风,3-4级 536 | 2016-06-19,32,25,雷阵雨~中雨,东南风,3-4级 537 | 2016-06-20,29,25,雷阵雨,西南风,3-4级 538 | 2016-06-21,29,24,小到中雨~小雨,东南风,3-4级 539 | 2016-06-22,31,25,大到暴雨~阴,西南风,3-4级 540 | 2016-06-23,33,27,雷阵雨~阴,西南风,3-4级 541 | 2016-06-24,27,21,中雨~小雨,西南风,3-4级 542 | 2016-06-25,27,20,阵雨~多云,西南风,3-4级 543 | 2016-06-26,29,21,多云~小雨,东南风,3-4级 544 | 2016-06-27,22,19,小到中雨~雷阵雨,东北风,3-4级 545 | 2016-06-28,25,21,小到中雨~大雨,西南风,3-4级 546 | 2016-06-29,28,22,阴~多云,东南风,3-4级 547 | 2016-06-30,33,25,多云~雷阵雨,东南风,3-4级 548 | 2016-07-01,26,23,大到暴雨,东南风,3-4级 549 | 2016-07-02,26,22,中到大雨~暴雨,东南风,3-4级 550 | 2016-07-03,25,22,暴雨~大雨,东南风,3-4级 551 | 2016-07-04,27,22,中雨~大雨,东南风,3-4级 552 | 2016-07-05,28,23,雷阵雨,3-4级 553 | 2016-07-06,28,25,中雨~小雨,3-4级 554 | 2016-07-07,30,24,雷阵雨~多云,3-4级 555 | 2016-07-08,32,24,阴~多云,3-4级 556 | 2016-07-09,31,24,多云~阴,3-4级 557 | 2016-07-10,28,24,雷阵雨~中雨,3-4级 558 | 2016-07-11,29,24,阵雨~阴,3-4级 559 | 2016-07-12,32,26,多云~阴,东北风,3-4级 560 | 2016-07-13,32,26,中雨~雷阵雨,西南风,3-4级 561 | 2016-07-14,29,26,中雨转雷阵雨,东风,3-4级 562 | 2016-07-15,33,22,中到大雨转小雨,西风,3-4级 563 | 2016-07-16,29,22,多云,东南风,3-4级 564 | 2016-07-17,32,24,多云转雷阵雨,东南风,3-4级 565 | 2016-07-18,32,26,阴,东风,3-4级 566 | 2016-07-19,32,26,雷阵雨,南风,3-4级 567 | 2016-07-20,36,28,晴,南风,3-4级 568 | 2016-07-21,37,29,多云,西风,3-4级 569 | 2016-07-22,37,29,晴,南风,3-4级 570 | 2016-07-23,38,30,晴,西南风,3-4级 571 | 2016-07-24,38,30,晴,西南风,3-4级 572 | 2016-07-25,38,29,晴,东南风,3-4级 573 | 2016-07-26,39,30,晴,东南风,3-4级 574 | 2016-07-27,38,30,多云,北风,3-4级 575 | 2016-07-28,38,29,晴,西南风,3-4级 576 | 2016-07-29,38,30,晴,东南风,3-4级 577 | 2016-07-30,37,28,晴,东风,3-4级 578 | 2016-07-31,36,27,多云,东南风,3-4级 579 | 2016-08-01,35,27,晴,东南风,2级 580 | 2016-08-02,35,27,雷阵雨,东南风,3级 581 | 2016-08-03,32,25,雷阵雨,西北风,1级 582 | 2016-08-04,33,26,雷阵雨,东南风,1级 583 | 2016-08-05,33,26,雷阵雨,东风,2级 584 | 2016-08-06,33,26,雷阵雨,东南风,2级 585 | 2016-08-07,34,26,多云,东风,2级 586 | 2016-08-08,35,26,雷阵雨,东风,3级 587 | 2016-08-09,34,27,多云,东风,3级 588 | 2016-08-10,33,27,阵雨,东风,3级 589 | 2016-08-11,35,27,多云,东风,2级 590 | 2016-08-12,35,27,多云,东风,3级 591 | 2016-08-13,36,28,多云,东风,3级 592 | 2016-08-14,35,27,晴,东风,2级 593 | 2016-08-15,35,27,晴,东风,3级 594 | 2016-08-16,34,26,多云,东风,2级 595 | 2016-08-17,34,26,多云,东北风,3级 596 | 2016-08-18,35,28,晴,东风,2级 597 | 2016-08-19,36,28,晴,东风,2级 598 | 2016-08-20,36,27,雷阵雨,东南风,微风 599 | 2016-08-21,31,26,雷阵雨,东南风,1级 600 | 2016-08-22,34,27,多云,东北风,2级 601 | 2016-08-23,34,24,多云,东北风,2级 602 | 2016-08-24,33,23,晴,东风,2级 603 | 2016-08-25,34,25,多云,北风,1级 604 | 2016-08-26,28,23,阵雨,北风,2级 605 | 2016-08-27,29,20,多云,北风,3级 606 | 2016-08-28,30,19,晴,北风,1级 607 | 2016-08-29,30,21,晴,西北风,2级 608 | 2016-08-30,30,21,晴,西北风,1级 609 | 2016-08-31,31,22,多云,西南风,2级 610 | 2016-09-01,33,23,多云,南风,2级 611 | 2016-09-02,34,22,晴,东风,1级 612 | 2016-09-03,34,23,晴,东北风,1级 613 | 2016-09-04,31,22,多云,东南风,2级 614 | 2016-09-05,32,23,多云,东风,1级 615 | 2016-09-06,27,21,阵雨,西南风,1级 616 | 2016-09-07,27,20,阵雨,东南风,2级 617 | 2016-09-08,30,21,多云,东南风,1级 618 | 2016-09-09,33,23,多云,东风,1级 619 | 2016-09-10,30,21,阴,东南风,2级 620 | 2016-09-11,28,20,多云,东南风,1级 621 | 2016-09-12,30,20,多云,东风,2级 622 | 2016-09-13,31,21,多云,东风,2级 623 | 2016-09-14,29,22,多云,东北风,1级 624 | 2016-09-15,24,21,大雨,东风,2级 625 | 2016-09-16,24,22,大到暴雨,北风,3级 626 | 2016-09-17,27,21,多云,东北风,2级 627 | 2016-09-18,28,20,多云,北风,3级 628 | 2016-09-19,26,18,多云,北风,2级 629 | 2016-09-20,25,17,多云,西北风,1级 630 | 2016-09-21,24,17,阴,东风,2级 631 | 2016-09-22,27,18,多云,东北风,2级 632 | 2016-09-23,27,19,多云,东风,2级 633 | 2016-09-24,28,20,多云,东风,2级 634 | 2016-09-25,28,21,多云,东风,2级 635 | 2016-09-26,29,22,多云,东风,2级 636 | 2016-09-27,30,23,阵雨,东北风,4级 637 | 2016-09-28,24,19,小到中雨,东北风,4级 638 | 2016-09-29,21,17,中雨,北风,3级 639 | 2016-09-30,22,19,中雨,东北风,3级 640 | 2016-10-01,25,21,小雨,东南风,微风 641 | 2016-10-02,25,20,阵雨,西风,2级 642 | 2016-10-03,27,18,多云,东北风,2级 643 | 2016-10-04,26,20,多云,北风,2级 644 | 2016-10-05,26,20,多云,东南风,1级 645 | 2016-10-06,25,19,多云,东风,2级 646 | 2016-10-07,20,16,中雨,北风,2级 647 | 2016-10-08,23,15,多云,东北风,1级 648 | 2016-10-09,21,13,多云,东北风,2级 649 | 2016-10-10,22,15,多云,东风,3级 650 | 2016-10-11,21,16,阴,东北风,2级 651 | 2016-10-12,21,16,阴,东风,1级 652 | 2016-10-13,20,16,阴,北风,2级 653 | 2016-10-14,19,17,阴,东风,2级 654 | 2016-10-15,20,16,阵雨,东风,1级 655 | 2016-10-16,23,17,阴,西北风,1级 656 | 2016-10-17,23,18,阴,东风,2级 657 | 2016-10-18,24,18,阴,东风,2级 658 | 2016-10-19,23,18,阴,东风,2级 659 | 2016-10-20,21,18,中雨,东风,2级 660 | 2016-10-21,22,19,中雨,东风,2级 661 | 2016-10-22,22,18,大雨,东北风,3级 662 | 2016-10-23,20,15,阵雨,东北风,2级 663 | 2016-10-24,21,15,阴,南风,1级 664 | 2016-10-25,19,16,阴,东风,1级 665 | 2016-10-26,20,16,大雨,东风,3级 666 | 2016-10-27,18,15,中雨,东南风,2级 667 | 2016-10-28,16,11,阵雨,北风,3级 668 | 2016-10-29,13,8,阵雨,西北风,2级 669 | 2016-10-30,18,12,阴,东风,1级 670 | 2016-10-31,15,10,阴,北风,4级 671 | 2016-11-01,15,5,多云,东风,2级 672 | 2016-11-02,16,8,晴,东南风,微风 673 | 2016-11-03,18,8,晴,南风,1级 674 | 2016-11-04,21,11,晴,东南风,2级 675 | 2016-11-05,23,11,晴,东南风,微风 676 | 2016-11-06,23,12,多云,东风,3级 677 | 2016-11-07,18,11,中雨,北风,5级 678 | 2016-11-08,12,8,阴,北风,2级 679 | 2016-11-09,12,8,阴,东北风,2级 680 | 2016-11-10,13,6,阴,西南风,1级 681 | 2016-11-11,18,8,多云,东南风,2级 682 | 2016-11-12,18,10,多云,北风,微风 683 | 2016-11-13,20,13,阴,东风,2级 684 | 2016-11-14,20,12,阴,东北风,3级 685 | 2016-11-15,17,7,多云,东风,2级 686 | 2016-11-16,17,11,阴,东风,1级 687 | 2016-11-17,16,14,阵雨,东风,1级 688 | 2016-11-18,19,15,小到中雨,东南风,1级 689 | 2016-11-19,19,15,阴,东北风,2级 690 | 2016-11-20,17,14,小雨,东风,2级 691 | 2016-11-21,17,10,小雨,东北风,2级 692 | 2016-11-22,9,2,小雨,北风,3级 693 | 2016-11-23,3,-2,中雪,西北风,2级 694 | 2016-11-24,5,0,多云,北风,1级 695 | 2016-11-25,8,3,阴,东北风,1级 696 | 2016-11-26,8,1,小雨,北风,微风 697 | 2016-11-27,13,2,晴,北风,微风 698 | 2016-11-28,13,5,多云,北风,2级 699 | 2016-11-29,12,7,多云,东风,2级 700 | 2016-11-30,13,4,阴,西南风,2级 701 | 2016-12-01,14,5,晴,东风,2级 702 | 2016-12-02,14,5,多云,东风,1级 703 | 2016-12-03,15,8,多云,北风,微风 704 | 2016-12-04,17,7,多云,西南风,1级 705 | 2016-12-05,19,5,多云,北风,4级 706 | 2016-12-06,10,1,多云,西风,1级 707 | 2016-12-07,15,5,多云,东南风,1级 708 | 2016-12-08,16,5,晴,西南风,1级 709 | 2016-12-09,13,5,晴,东北风,3级 710 | 2016-12-10,11,6,多云,东风,3级 711 | 2016-12-11,12,6,多云,东风,2级 712 | 2016-12-12,12,9,阵雨,西北风,1级 713 | 2016-12-13,11,3,小到中雨,北风,2级 714 | 2016-12-14,6,-2,多云,北风,3级 715 | 2016-12-15,8,-1,晴,西风,1级 716 | 2016-12-16,9,-1,晴,东南风,1级 717 | 2016-12-17,12,2,多云,东南风,2级 718 | 2016-12-18,14,5,多云,东南风,微风 719 | 2016-12-19,16,6,多云,东北风,2级 720 | 2016-12-20,12,9,小雨,东风,3级 721 | 2016-12-21,13,6,中雨,西北风,3级 722 | 2016-12-22,9,2,阴,北风,1级 723 | 2016-12-23,9,2,多云,北风,2级 724 | 2016-12-24,8,2,多云,东风,3级 725 | 2016-12-25,11,8,小到中雨,东风,2级 726 | 2016-12-26,10,1,小雨,西北风,3级 727 | 2016-12-27,5,-2,多云,北风,2级 728 | 2016-12-28,6,-1,晴,东北风,1级 729 | 2016-12-29,6,-1,多云,西北风,1级 730 | 2016-12-30,9,0,晴,东南风,1级 731 | 2016-12-31,11,-1,晴,北风,微风 732 | --------------------------------------------------------------------------------