├── .gitignore ├── LICENSE ├── README.md └── working ├── 001_data.ipynb ├── 101_eda.ipynb └── 201_train_1.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tianchi_ship_2019 2 | 天池智慧海洋 2019 https://tianchi.aliyun.com/competition/entrance/231768/introduction?spm=5176.12281949.1003.1.493e5cfde2Jbke 3 | 4 | 5 | # score 6 | 0.85 7 | 8 | # 说明 9 | 1. 先执行data,生成train.h5,test.h5 10 | 11 | -------------------------------------------------------------------------------- /working/001_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import os\n", 12 | "from tqdm import tqdm\n", 13 | "import lightgbm as lgb\n", 14 | "from sklearn.model_selection import StratifiedKFold\n", 15 | "from sklearn import metrics\n", 16 | "import warnings\n", 17 | "\n", 18 | "warnings.filterwarnings('ignore')\n", 19 | "train_path = '../input/hy_round1_train_20200102'\n", 20 | "test_path = '../input/hy_round1_testA_20200102'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 36, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "7000 2000\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "train_files = os.listdir(train_path)\n", 38 | "test_files = os.listdir(test_path)\n", 39 | "print(len(train_files), len(test_files))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 11, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "['6966.csv', '545.csv', '223.csv']" 51 | ] 52 | }, 53 | "execution_count": 11, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "train_files[:3]" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 12, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "['8793.csv', '8787.csv', '8977.csv']" 71 | ] 72 | }, 73 | "execution_count": 12, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "test_files[:3]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 16, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "df = pd.read_csv(f'{train_path}/6966.csv')" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 17, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/html": [ 99 | "
\n", 100 | "\n", 113 | "\n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | "
渔船IDxy速度方向timetype
069666.265902e+065.279254e+060.113061106 23:58:16围网
169666.265902e+065.279254e+060.0001106 23:48:21围网
269666.265902e+065.279254e+060.0001106 23:38:19围网
369666.265902e+065.279254e+060.0001106 23:28:36围网
469666.265902e+065.279254e+060.321301106 23:08:17围网
\n", 179 | "
" 180 | ], 181 | "text/plain": [ 182 | " 渔船ID x y 速度 方向 time type\n", 183 | "0 6966 6.265902e+06 5.279254e+06 0.11 306 1106 23:58:16 围网\n", 184 | "1 6966 6.265902e+06 5.279254e+06 0.00 0 1106 23:48:21 围网\n", 185 | "2 6966 6.265902e+06 5.279254e+06 0.00 0 1106 23:38:19 围网\n", 186 | "3 6966 6.265902e+06 5.279254e+06 0.00 0 1106 23:28:36 围网\n", 187 | "4 6966 6.265902e+06 5.279254e+06 0.32 130 1106 23:08:17 围网" 188 | ] 189 | }, 190 | "execution_count": 17, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "df.head()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 18, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "array(['围网'], dtype=object)" 208 | ] 209 | }, 210 | "execution_count": 18, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "df['type'].unique()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 19, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/plain": [ 227 | "(389, 7)" 228 | ] 229 | }, 230 | "execution_count": 19, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "df.shape" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 21, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stderr", 246 | "output_type": "stream", 247 | "text": [ 248 | "100%|██████████| 7000/7000 [00:34<00:00, 260.00it/s]\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "ret = []\n", 254 | "for file in tqdm(train_files):\n", 255 | " df = pd.read_csv(f'{train_path}/{file}')\n", 256 | " ret.append(df)\n", 257 | "df = pd.concat(ret)\n", 258 | "df.columns = ['ship','x','y','v','d','time','type']" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "df.to_hdf('../input/train.h5', 'df', mode='w')" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 37, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stderr", 277 | "output_type": "stream", 278 | "text": [ 279 | "100%|██████████| 2000/2000 [00:08<00:00, 225.65it/s]\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "ret = []\n", 285 | "for file in tqdm(test_files):\n", 286 | " df = pd.read_csv(f'{test_path}/{file}')\n", 287 | " ret.append(df)\n", 288 | "df = pd.concat(ret)\n", 289 | "df.columns = ['ship','x','y','v','d','time']" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 41, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "df.to_hdf('../input/test.h5', 'df', mode='w')" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 40, 304 | "metadata": { 305 | "scrolled": true 306 | }, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "(782378, 6)" 312 | ] 313 | }, 314 | "execution_count": 40, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "df.shape" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 38, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/html": [ 331 | "
\n", 332 | "\n", 345 | "\n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | "
渔船IDxy速度方向time
087936.102450e+065.112760e+060.0001106 23:56:34
187936.102450e+065.112760e+060.0001106 23:46:34
287936.102450e+065.112760e+060.0001106 23:37:31
387936.102450e+065.112760e+060.1601106 23:26:34
487936.102450e+065.112760e+060.0001106 23:16:34
\n", 405 | "
" 406 | ], 407 | "text/plain": [ 408 | " 渔船ID x y 速度 方向 time\n", 409 | "0 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:56:34\n", 410 | "1 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:46:34\n", 411 | "2 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:37:31\n", 412 | "3 8793 6.102450e+06 5.112760e+06 0.16 0 1106 23:26:34\n", 413 | "4 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:16:34" 414 | ] 415 | }, 416 | "execution_count": 38, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "df.head()" 423 | ] 424 | } 425 | ], 426 | "metadata": { 427 | "kernelspec": { 428 | "display_name": "Python 3", 429 | "language": "python", 430 | "name": "python3" 431 | }, 432 | "language_info": { 433 | "codemirror_mode": { 434 | "name": "ipython", 435 | "version": 3 436 | }, 437 | "file_extension": ".py", 438 | "mimetype": "text/x-python", 439 | "name": "python", 440 | "nbconvert_exporter": "python", 441 | "pygments_lexer": "ipython3", 442 | "version": "3.7.3" 443 | }, 444 | "toc": { 445 | "base_numbering": 1, 446 | "nav_menu": {}, 447 | "number_sections": true, 448 | "sideBar": true, 449 | "skip_h1_title": false, 450 | "title_cell": "Table of Contents", 451 | "title_sidebar": "Contents", 452 | "toc_cell": false, 453 | "toc_position": {}, 454 | "toc_section_display": true, 455 | "toc_window_display": false 456 | } 457 | }, 458 | "nbformat": 4, 459 | "nbformat_minor": 2 460 | } 461 | -------------------------------------------------------------------------------- /working/201_train_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 52, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import os\n", 14 | "from tqdm import tqdm\n", 15 | "import lightgbm as lgb\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn import metrics\n", 18 | "import warnings\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "\n", 21 | "pd.set_option('display.max_columns', 100)\n", 22 | "warnings.filterwarnings('ignore')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 97, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "def group_feature(df, key, target, aggs): \n", 32 | " agg_dict = {}\n", 33 | " for ag in aggs:\n", 34 | " agg_dict[f'{target}_{ag}'] = ag\n", 35 | " print(agg_dict)\n", 36 | " t = df.groupby(key)[target].agg(agg_dict).reset_index()\n", 37 | " return t\n", 38 | "\n", 39 | "def extract_feature(df, train):\n", 40 | " t = group_feature(df, 'ship','x',['max','min','mean','std','skew','sum'])\n", 41 | " train = pd.merge(train, t, on='ship', how='left')\n", 42 | " t = group_feature(df, 'ship','x',['count'])\n", 43 | " train = pd.merge(train, t, on='ship', how='left')\n", 44 | " t = group_feature(df, 'ship','y',['max','min','mean','std','skew','sum'])\n", 45 | " train = pd.merge(train, t, on='ship', how='left')\n", 46 | " t = group_feature(df, 'ship','v',['max','min','mean','std','skew','sum'])\n", 47 | " train = pd.merge(train, t, on='ship', how='left')\n", 48 | " t = group_feature(df, 'ship','d',['max','min','mean','std','skew','sum'])\n", 49 | " train = pd.merge(train, t, on='ship', how='left')\n", 50 | " train['x_max_x_min'] = train['x_max'] - train['x_min']\n", 51 | " train['y_max_y_min'] = train['y_max'] - train['y_min']\n", 52 | " train['y_max_x_min'] = train['y_max'] - train['x_min']\n", 53 | " train['x_max_y_min'] = train['x_max'] - train['y_min']\n", 54 | " train['slope'] = train['y_max_y_min'] / np.where(train['x_max_x_min']==0, 0.001, train['x_max_x_min'])\n", 55 | " train['area'] = train['x_max_x_min'] * train['y_max_y_min']\n", 56 | " \n", 57 | " mode_hour = df.groupby('ship')['hour'].agg(lambda x:x.value_counts().index[0]).to_dict()\n", 58 | " train['mode_hour'] = train['ship'].map(mode_hour)\n", 59 | " \n", 60 | " t = group_feature(df, 'ship','hour',['max','min'])\n", 61 | " train = pd.merge(train, t, on='ship', how='left')\n", 62 | " \n", 63 | " hour_nunique = df.groupby('ship')['hour'].nunique().to_dict()\n", 64 | " date_nunique = df.groupby('ship')['date'].nunique().to_dict()\n", 65 | " train['hour_nunique'] = train['ship'].map(hour_nunique)\n", 66 | " train['date_nunique'] = train['ship'].map(date_nunique)\n", 67 | "\n", 68 | " t = df.groupby('ship')['time'].agg({'diff_time':lambda x:np.max(x)-np.min(x)}).reset_index()\n", 69 | " t['diff_day'] = t['diff_time'].dt.days\n", 70 | " t['diff_second'] = t['diff_time'].dt.seconds\n", 71 | " train = pd.merge(train, t, on='ship', how='left')\n", 72 | " return train\n", 73 | "\n", 74 | "def extract_dt(df):\n", 75 | " df['time'] = pd.to_datetime(df['time'], format='%m%d %H:%M:%S')\n", 76 | " # df['month'] = df['time'].dt.month\n", 77 | " # df['day'] = df['time'].dt.day\n", 78 | " df['date'] = df['time'].dt.date\n", 79 | " df['hour'] = df['time'].dt.hour\n", 80 | " # df = df.drop_duplicates(['ship','month'])\n", 81 | " df['weekday'] = df['time'].dt.weekday\n", 82 | " return df" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 70, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "train = pd.read_hdf('../input/train.h5')\n", 92 | "# train = df.drop_duplicates(['ship','type'])" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 71, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "test = pd.read_hdf('../input/test.h5')" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 72, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "train = extract_dt(train)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 73, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "test = extract_dt(test)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 107, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "train_label = train.drop_duplicates('ship')\n", 129 | "test_label = test.drop_duplicates('ship')" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 108, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "拖网 0.623000\n", 141 | "围网 0.231571\n", 142 | "刺网 0.145429\n", 143 | "Name: type, dtype: float64" 144 | ] 145 | }, 146 | "execution_count": 108, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "train_label['type'].value_counts(1)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 99, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "type_map = dict(zip(train_label['type'].unique(), np.arange(3)))\n", 162 | "type_map_rev = {v:k for k,v in type_map.items()}\n", 163 | "train_label['type'] = train_label['type'].map(type_map)\n" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 100, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "{'x_max': 'max', 'x_min': 'min', 'x_mean': 'mean', 'x_std': 'std', 'x_sum': 'sum'}\n", 176 | "{'x_count': 'count'}\n", 177 | "{'y_max': 'max', 'y_min': 'min', 'y_mean': 'mean', 'y_std': 'std', 'y_sum': 'sum'}\n", 178 | "{'v_max': 'max', 'v_min': 'min', 'v_mean': 'mean', 'v_std': 'std', 'v_sum': 'sum'}\n", 179 | "{'d_max': 'max', 'd_min': 'min', 'd_mean': 'mean', 'd_std': 'std', 'd_sum': 'sum'}\n", 180 | "{'hour_max': 'max', 'hour_min': 'min'}\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "train_label = extract_feature(train, train_label)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 101, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "{'x_max': 'max', 'x_min': 'min', 'x_mean': 'mean', 'x_std': 'std', 'x_sum': 'sum'}\n", 198 | "{'x_count': 'count'}\n", 199 | "{'y_max': 'max', 'y_min': 'min', 'y_mean': 'mean', 'y_std': 'std', 'y_sum': 'sum'}\n", 200 | "{'v_max': 'max', 'v_min': 'min', 'v_mean': 'mean', 'v_std': 'std', 'v_sum': 'sum'}\n", 201 | "{'d_max': 'max', 'd_min': 'min', 'd_mean': 'mean', 'd_std': 'std', 'd_sum': 'sum'}\n", 202 | "{'hour_max': 'max', 'hour_min': 'min'}\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "test_label = extract_feature(test, test_label)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 102, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "\n", 217 | "features = [x for x in train_label.columns if x not in ['ship','type','time','diff_time','date']]\n", 218 | "target = 'type'" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 103, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "name": "stdout", 228 | "output_type": "stream", 229 | "text": [ 230 | "40 x,y,v,d,hour,weekday,x_max,x_min,x_mean,x_std,x_sum,x_count,y_max,y_min,y_mean,y_std,y_sum,v_max,v_min,v_mean,v_std,v_sum,d_max,d_min,d_mean,d_std,d_sum,x_max_x_min,y_max_y_min,y_max_x_min,x_max_y_min,slope,area,mode_hour,hour_max,hour_min,hour_nunique,date_nunique,diff_day,diff_second\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "print(len(features), ','.join(features))" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 104, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "params = {\n", 245 | " 'n_estimators': 5000,\n", 246 | " 'boosting_type': 'gbdt',\n", 247 | " 'objective': 'multiclass',\n", 248 | " 'num_class': 3,\n", 249 | " 'early_stopping_rounds': 100,\n", 250 | "}" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 105, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "Training until validation scores don't improve for 100 rounds\n", 263 | "[100]\ttraining's multi_logloss: 0.0828667\tvalid_1's multi_logloss: 0.269078\n", 264 | "[200]\ttraining's multi_logloss: 0.022058\tvalid_1's multi_logloss: 0.264574\n", 265 | "Early stopping, best iteration is:\n", 266 | "[180]\ttraining's multi_logloss: 0.0284972\tvalid_1's multi_logloss: 0.262031\n", 267 | "0 val f1 0.8744567161504285\n", 268 | "Training until validation scores don't improve for 100 rounds\n", 269 | "[100]\ttraining's multi_logloss: 0.085238\tvalid_1's multi_logloss: 0.274897\n", 270 | "[200]\ttraining's multi_logloss: 0.0222402\tvalid_1's multi_logloss: 0.272668\n", 271 | "Early stopping, best iteration is:\n", 272 | "[153]\ttraining's multi_logloss: 0.0416896\tvalid_1's multi_logloss: 0.268232\n", 273 | "1 val f1 0.8570390224496975\n", 274 | "Training until validation scores don't improve for 100 rounds\n", 275 | "[100]\ttraining's multi_logloss: 0.0839062\tvalid_1's multi_logloss: 0.266458\n", 276 | "[200]\ttraining's multi_logloss: 0.0228758\tvalid_1's multi_logloss: 0.25578\n", 277 | "Early stopping, best iteration is:\n", 278 | "[164]\ttraining's multi_logloss: 0.0363628\tvalid_1's multi_logloss: 0.254512\n", 279 | "2 val f1 0.8808118299909231\n", 280 | "Training until validation scores don't improve for 100 rounds\n", 281 | "[100]\ttraining's multi_logloss: 0.0845035\tvalid_1's multi_logloss: 0.272673\n", 282 | "[200]\ttraining's multi_logloss: 0.0225549\tvalid_1's multi_logloss: 0.277392\n", 283 | "Early stopping, best iteration is:\n", 284 | "[108]\ttraining's multi_logloss: 0.0758342\tvalid_1's multi_logloss: 0.270036\n", 285 | "3 val f1 0.8629486588985998\n", 286 | "Training until validation scores don't improve for 100 rounds\n", 287 | "[100]\ttraining's multi_logloss: 0.0815182\tvalid_1's multi_logloss: 0.296271\n", 288 | "[200]\ttraining's multi_logloss: 0.0211976\tvalid_1's multi_logloss: 0.295628\n", 289 | "Early stopping, best iteration is:\n", 290 | "[160]\ttraining's multi_logloss: 0.0357663\tvalid_1's multi_logloss: 0.290207\n", 291 | "4 val f1 0.8549111545740181\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", 297 | "\n", 298 | "X = train_label[features].copy()\n", 299 | "y = train_label[target]\n", 300 | "models = []\n", 301 | "pred = np.zeros((len(test_label),3))\n", 302 | "oof = np.zeros((len(X), 3))\n", 303 | "for index, (train_idx, val_idx) in enumerate(fold.split(X, y)):\n", 304 | "\n", 305 | " train_set = lgb.Dataset(X.iloc[train_idx], y.iloc[train_idx])\n", 306 | " val_set = lgb.Dataset(X.iloc[val_idx], y.iloc[val_idx])\n", 307 | "\n", 308 | " model = lgb.train(params, train_set, valid_sets=[train_set, val_set], verbose_eval=100)\n", 309 | " models.append(model)\n", 310 | " val_pred = model.predict(X.iloc[val_idx])\n", 311 | " oof[val_idx] = val_pred\n", 312 | " val_y = y.iloc[val_idx]\n", 313 | " val_pred = np.argmax(val_pred, axis=1)\n", 314 | " print(index, 'val f1', metrics.f1_score(val_y, val_pred, average='macro'))\n", 315 | " # 0.8695539641133697\n", 316 | " # 0.8866211724839532\n", 317 | "\n", 318 | " test_pred = model.predict(test_label[features])\n", 319 | " pred += test_pred/5" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 106, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "oof f1 0.8660762740409558\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "oof = np.argmax(oof, axis=1)\n", 337 | "print('oof f1', metrics.f1_score(oof, y, average='macro'))\n", 338 | "# 0.8701544575329372" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 152, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "1 0.6325\n", 351 | "0 0.2390\n", 352 | "2 0.1285\n", 353 | "Name: pred, dtype: float64\n" 354 | ] 355 | } 356 | ], 357 | "source": [ 358 | "pred = np.argmax(pred, axis=1)\n", 359 | "sub = test_label[['ship']]\n", 360 | "sub['pred'] = pred\n", 361 | "\n", 362 | "print(sub['pred'].value_counts(1))\n", 363 | "sub['pred'] = sub['pred'].map(type_map_rev)\n", 364 | "sub.to_csv('result.csv', index=None, header=None)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 84, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "ret = []\n", 374 | "for index, model in enumerate(models):\n", 375 | " df = pd.DataFrame()\n", 376 | " df['name'] = model.feature_name()\n", 377 | " df['score'] = model.feature_importance()\n", 378 | " df['fold'] = index\n", 379 | " ret.append(df)\n", 380 | " \n", 381 | "df = pd.concat(ret)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 85, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "df = df.groupby('name', as_index=False)['score'].mean()\n", 391 | "df = df.sort_values(['score'], ascending=False)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 86, 397 | "metadata": {}, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "text/html": [ 402 | "
\n", 403 | "\n", 416 | "\n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | "
namescore
37y_max_x_min676.4
36y_max624.6
31x_min611.4
29x_max_y_min568.0
22v_std535.6
35y512.0
25x458.8
21v_skew445.4
40y_min422.6
32x_skew419.2
41y_skew416.4
16slope398.2
27x_max373.4
10diff_second368.6
33x_std343.8
3d_mean342.2
17v341.2
18v_max338.6
19v_mean331.8
6d_std331.4
39y_mean320.4
30x_mean319.0
42y_std285.4
23v_sum271.4
26x_count265.0
7d_sum262.0
28x_max_x_min258.4
1d252.2
34x_sum241.8
5d_skew239.4
38y_max_y_min233.2
0area225.6
43y_sum204.4
15mode_hour177.8
2d_max155.6
20v_min61.0
11hour26.0
8date_nunique25.6
24weekday23.2
9diff_day20.4
4d_min15.2
14hour_nunique1.4
13hour_min0.0
12hour_max0.0
\n", 647 | "
" 648 | ], 649 | "text/plain": [ 650 | " name score\n", 651 | "37 y_max_x_min 676.4\n", 652 | "36 y_max 624.6\n", 653 | "31 x_min 611.4\n", 654 | "29 x_max_y_min 568.0\n", 655 | "22 v_std 535.6\n", 656 | "35 y 512.0\n", 657 | "25 x 458.8\n", 658 | "21 v_skew 445.4\n", 659 | "40 y_min 422.6\n", 660 | "32 x_skew 419.2\n", 661 | "41 y_skew 416.4\n", 662 | "16 slope 398.2\n", 663 | "27 x_max 373.4\n", 664 | "10 diff_second 368.6\n", 665 | "33 x_std 343.8\n", 666 | "3 d_mean 342.2\n", 667 | "17 v 341.2\n", 668 | "18 v_max 338.6\n", 669 | "19 v_mean 331.8\n", 670 | "6 d_std 331.4\n", 671 | "39 y_mean 320.4\n", 672 | "30 x_mean 319.0\n", 673 | "42 y_std 285.4\n", 674 | "23 v_sum 271.4\n", 675 | "26 x_count 265.0\n", 676 | "7 d_sum 262.0\n", 677 | "28 x_max_x_min 258.4\n", 678 | "1 d 252.2\n", 679 | "34 x_sum 241.8\n", 680 | "5 d_skew 239.4\n", 681 | "38 y_max_y_min 233.2\n", 682 | "0 area 225.6\n", 683 | "43 y_sum 204.4\n", 684 | "15 mode_hour 177.8\n", 685 | "2 d_max 155.6\n", 686 | "20 v_min 61.0\n", 687 | "11 hour 26.0\n", 688 | "8 date_nunique 25.6\n", 689 | "24 weekday 23.2\n", 690 | "9 diff_day 20.4\n", 691 | "4 d_min 15.2\n", 692 | "14 hour_nunique 1.4\n", 693 | "13 hour_min 0.0\n", 694 | "12 hour_max 0.0" 695 | ] 696 | }, 697 | "execution_count": 86, 698 | "metadata": {}, 699 | "output_type": "execute_result" 700 | } 701 | ], 702 | "source": [ 703 | "df" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": null, 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [] 712 | } 713 | ], 714 | "metadata": { 715 | "kernelspec": { 716 | "display_name": "Python 3", 717 | "language": "python", 718 | "name": "python3" 719 | }, 720 | "language_info": { 721 | "codemirror_mode": { 722 | "name": "ipython", 723 | "version": 3 724 | }, 725 | "file_extension": ".py", 726 | "mimetype": "text/x-python", 727 | "name": "python", 728 | "nbconvert_exporter": "python", 729 | "pygments_lexer": "ipython3", 730 | "version": "3.7.3" 731 | }, 732 | "toc": { 733 | "base_numbering": 1, 734 | "nav_menu": {}, 735 | "number_sections": true, 736 | "sideBar": true, 737 | "skip_h1_title": false, 738 | "title_cell": "Table of Contents", 739 | "title_sidebar": "Contents", 740 | "toc_cell": false, 741 | "toc_position": {}, 742 | "toc_section_display": true, 743 | "toc_window_display": false 744 | } 745 | }, 746 | "nbformat": 4, 747 | "nbformat_minor": 2 748 | } 749 | --------------------------------------------------------------------------------