├── .gitignore
├── LICENSE
├── README.md
└── working
├── 001_data.ipynb
├── 101_eda.ipynb
└── 201_train_1.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # tianchi_ship_2019
2 | 天池智慧海洋 2019 https://tianchi.aliyun.com/competition/entrance/231768/introduction?spm=5176.12281949.1003.1.493e5cfde2Jbke
3 |
4 |
5 | # score
6 | 0.85
7 |
8 | # 说明
9 | 1. 先执行data,生成train.h5,test.h5
10 |
11 |
--------------------------------------------------------------------------------
/working/001_data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import os\n",
12 | "from tqdm import tqdm\n",
13 | "import lightgbm as lgb\n",
14 | "from sklearn.model_selection import StratifiedKFold\n",
15 | "from sklearn import metrics\n",
16 | "import warnings\n",
17 | "\n",
18 | "warnings.filterwarnings('ignore')\n",
19 | "train_path = '../input/hy_round1_train_20200102'\n",
20 | "test_path = '../input/hy_round1_testA_20200102'"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 36,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "7000 2000\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "train_files = os.listdir(train_path)\n",
38 | "test_files = os.listdir(test_path)\n",
39 | "print(len(train_files), len(test_files))"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 11,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "data": {
49 | "text/plain": [
50 | "['6966.csv', '545.csv', '223.csv']"
51 | ]
52 | },
53 | "execution_count": 11,
54 | "metadata": {},
55 | "output_type": "execute_result"
56 | }
57 | ],
58 | "source": [
59 | "train_files[:3]"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 12,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/plain": [
70 | "['8793.csv', '8787.csv', '8977.csv']"
71 | ]
72 | },
73 | "execution_count": 12,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "test_files[:3]"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 16,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": [
88 | "df = pd.read_csv(f'{train_path}/6966.csv')"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 17,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "data": {
98 | "text/html": [
99 | "
\n",
100 | "\n",
113 | "
\n",
114 | " \n",
115 | " \n",
116 | " | \n",
117 | " 渔船ID | \n",
118 | " x | \n",
119 | " y | \n",
120 | " 速度 | \n",
121 | " 方向 | \n",
122 | " time | \n",
123 | " type | \n",
124 | "
\n",
125 | " \n",
126 | " \n",
127 | " \n",
128 | " 0 | \n",
129 | " 6966 | \n",
130 | " 6.265902e+06 | \n",
131 | " 5.279254e+06 | \n",
132 | " 0.11 | \n",
133 | " 306 | \n",
134 | " 1106 23:58:16 | \n",
135 | " 围网 | \n",
136 | "
\n",
137 | " \n",
138 | " 1 | \n",
139 | " 6966 | \n",
140 | " 6.265902e+06 | \n",
141 | " 5.279254e+06 | \n",
142 | " 0.00 | \n",
143 | " 0 | \n",
144 | " 1106 23:48:21 | \n",
145 | " 围网 | \n",
146 | "
\n",
147 | " \n",
148 | " 2 | \n",
149 | " 6966 | \n",
150 | " 6.265902e+06 | \n",
151 | " 5.279254e+06 | \n",
152 | " 0.00 | \n",
153 | " 0 | \n",
154 | " 1106 23:38:19 | \n",
155 | " 围网 | \n",
156 | "
\n",
157 | " \n",
158 | " 3 | \n",
159 | " 6966 | \n",
160 | " 6.265902e+06 | \n",
161 | " 5.279254e+06 | \n",
162 | " 0.00 | \n",
163 | " 0 | \n",
164 | " 1106 23:28:36 | \n",
165 | " 围网 | \n",
166 | "
\n",
167 | " \n",
168 | " 4 | \n",
169 | " 6966 | \n",
170 | " 6.265902e+06 | \n",
171 | " 5.279254e+06 | \n",
172 | " 0.32 | \n",
173 | " 130 | \n",
174 | " 1106 23:08:17 | \n",
175 | " 围网 | \n",
176 | "
\n",
177 | " \n",
178 | "
\n",
179 | "
"
180 | ],
181 | "text/plain": [
182 | " 渔船ID x y 速度 方向 time type\n",
183 | "0 6966 6.265902e+06 5.279254e+06 0.11 306 1106 23:58:16 围网\n",
184 | "1 6966 6.265902e+06 5.279254e+06 0.00 0 1106 23:48:21 围网\n",
185 | "2 6966 6.265902e+06 5.279254e+06 0.00 0 1106 23:38:19 围网\n",
186 | "3 6966 6.265902e+06 5.279254e+06 0.00 0 1106 23:28:36 围网\n",
187 | "4 6966 6.265902e+06 5.279254e+06 0.32 130 1106 23:08:17 围网"
188 | ]
189 | },
190 | "execution_count": 17,
191 | "metadata": {},
192 | "output_type": "execute_result"
193 | }
194 | ],
195 | "source": [
196 | "df.head()"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 18,
202 | "metadata": {},
203 | "outputs": [
204 | {
205 | "data": {
206 | "text/plain": [
207 | "array(['围网'], dtype=object)"
208 | ]
209 | },
210 | "execution_count": 18,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "df['type'].unique()"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 19,
222 | "metadata": {},
223 | "outputs": [
224 | {
225 | "data": {
226 | "text/plain": [
227 | "(389, 7)"
228 | ]
229 | },
230 | "execution_count": 19,
231 | "metadata": {},
232 | "output_type": "execute_result"
233 | }
234 | ],
235 | "source": [
236 | "df.shape"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 21,
242 | "metadata": {},
243 | "outputs": [
244 | {
245 | "name": "stderr",
246 | "output_type": "stream",
247 | "text": [
248 | "100%|██████████| 7000/7000 [00:34<00:00, 260.00it/s]\n"
249 | ]
250 | }
251 | ],
252 | "source": [
253 | "ret = []\n",
254 | "for file in tqdm(train_files):\n",
255 | " df = pd.read_csv(f'{train_path}/{file}')\n",
256 | " ret.append(df)\n",
257 | "df = pd.concat(ret)\n",
258 | "df.columns = ['ship','x','y','v','d','time','type']"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": null,
264 | "metadata": {},
265 | "outputs": [],
266 | "source": [
267 | "df.to_hdf('../input/train.h5', 'df', mode='w')"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 37,
273 | "metadata": {},
274 | "outputs": [
275 | {
276 | "name": "stderr",
277 | "output_type": "stream",
278 | "text": [
279 | "100%|██████████| 2000/2000 [00:08<00:00, 225.65it/s]\n"
280 | ]
281 | }
282 | ],
283 | "source": [
284 | "ret = []\n",
285 | "for file in tqdm(test_files):\n",
286 | " df = pd.read_csv(f'{test_path}/{file}')\n",
287 | " ret.append(df)\n",
288 | "df = pd.concat(ret)\n",
289 | "df.columns = ['ship','x','y','v','d','time']"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 41,
295 | "metadata": {},
296 | "outputs": [],
297 | "source": [
298 | "df.to_hdf('../input/test.h5', 'df', mode='w')"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": 40,
304 | "metadata": {
305 | "scrolled": true
306 | },
307 | "outputs": [
308 | {
309 | "data": {
310 | "text/plain": [
311 | "(782378, 6)"
312 | ]
313 | },
314 | "execution_count": 40,
315 | "metadata": {},
316 | "output_type": "execute_result"
317 | }
318 | ],
319 | "source": [
320 | "df.shape"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 38,
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "data": {
330 | "text/html": [
331 | "\n",
332 | "\n",
345 | "
\n",
346 | " \n",
347 | " \n",
348 | " | \n",
349 | " 渔船ID | \n",
350 | " x | \n",
351 | " y | \n",
352 | " 速度 | \n",
353 | " 方向 | \n",
354 | " time | \n",
355 | "
\n",
356 | " \n",
357 | " \n",
358 | " \n",
359 | " 0 | \n",
360 | " 8793 | \n",
361 | " 6.102450e+06 | \n",
362 | " 5.112760e+06 | \n",
363 | " 0.00 | \n",
364 | " 0 | \n",
365 | " 1106 23:56:34 | \n",
366 | "
\n",
367 | " \n",
368 | " 1 | \n",
369 | " 8793 | \n",
370 | " 6.102450e+06 | \n",
371 | " 5.112760e+06 | \n",
372 | " 0.00 | \n",
373 | " 0 | \n",
374 | " 1106 23:46:34 | \n",
375 | "
\n",
376 | " \n",
377 | " 2 | \n",
378 | " 8793 | \n",
379 | " 6.102450e+06 | \n",
380 | " 5.112760e+06 | \n",
381 | " 0.00 | \n",
382 | " 0 | \n",
383 | " 1106 23:37:31 | \n",
384 | "
\n",
385 | " \n",
386 | " 3 | \n",
387 | " 8793 | \n",
388 | " 6.102450e+06 | \n",
389 | " 5.112760e+06 | \n",
390 | " 0.16 | \n",
391 | " 0 | \n",
392 | " 1106 23:26:34 | \n",
393 | "
\n",
394 | " \n",
395 | " 4 | \n",
396 | " 8793 | \n",
397 | " 6.102450e+06 | \n",
398 | " 5.112760e+06 | \n",
399 | " 0.00 | \n",
400 | " 0 | \n",
401 | " 1106 23:16:34 | \n",
402 | "
\n",
403 | " \n",
404 | "
\n",
405 | "
"
406 | ],
407 | "text/plain": [
408 | " 渔船ID x y 速度 方向 time\n",
409 | "0 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:56:34\n",
410 | "1 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:46:34\n",
411 | "2 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:37:31\n",
412 | "3 8793 6.102450e+06 5.112760e+06 0.16 0 1106 23:26:34\n",
413 | "4 8793 6.102450e+06 5.112760e+06 0.00 0 1106 23:16:34"
414 | ]
415 | },
416 | "execution_count": 38,
417 | "metadata": {},
418 | "output_type": "execute_result"
419 | }
420 | ],
421 | "source": [
422 | "df.head()"
423 | ]
424 | }
425 | ],
426 | "metadata": {
427 | "kernelspec": {
428 | "display_name": "Python 3",
429 | "language": "python",
430 | "name": "python3"
431 | },
432 | "language_info": {
433 | "codemirror_mode": {
434 | "name": "ipython",
435 | "version": 3
436 | },
437 | "file_extension": ".py",
438 | "mimetype": "text/x-python",
439 | "name": "python",
440 | "nbconvert_exporter": "python",
441 | "pygments_lexer": "ipython3",
442 | "version": "3.7.3"
443 | },
444 | "toc": {
445 | "base_numbering": 1,
446 | "nav_menu": {},
447 | "number_sections": true,
448 | "sideBar": true,
449 | "skip_h1_title": false,
450 | "title_cell": "Table of Contents",
451 | "title_sidebar": "Contents",
452 | "toc_cell": false,
453 | "toc_position": {},
454 | "toc_section_display": true,
455 | "toc_window_display": false
456 | }
457 | },
458 | "nbformat": 4,
459 | "nbformat_minor": 2
460 | }
461 |
--------------------------------------------------------------------------------
/working/201_train_1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 52,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%matplotlib inline\n",
10 | "\n",
11 | "import pandas as pd\n",
12 | "import numpy as np\n",
13 | "import os\n",
14 | "from tqdm import tqdm\n",
15 | "import lightgbm as lgb\n",
16 | "from sklearn.model_selection import StratifiedKFold\n",
17 | "from sklearn import metrics\n",
18 | "import warnings\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "\n",
21 | "pd.set_option('display.max_columns', 100)\n",
22 | "warnings.filterwarnings('ignore')"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 97,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "def group_feature(df, key, target, aggs): \n",
32 | " agg_dict = {}\n",
33 | " for ag in aggs:\n",
34 | " agg_dict[f'{target}_{ag}'] = ag\n",
35 | " print(agg_dict)\n",
36 | " t = df.groupby(key)[target].agg(agg_dict).reset_index()\n",
37 | " return t\n",
38 | "\n",
39 | "def extract_feature(df, train):\n",
40 | " t = group_feature(df, 'ship','x',['max','min','mean','std','skew','sum'])\n",
41 | " train = pd.merge(train, t, on='ship', how='left')\n",
42 | " t = group_feature(df, 'ship','x',['count'])\n",
43 | " train = pd.merge(train, t, on='ship', how='left')\n",
44 | " t = group_feature(df, 'ship','y',['max','min','mean','std','skew','sum'])\n",
45 | " train = pd.merge(train, t, on='ship', how='left')\n",
46 | " t = group_feature(df, 'ship','v',['max','min','mean','std','skew','sum'])\n",
47 | " train = pd.merge(train, t, on='ship', how='left')\n",
48 | " t = group_feature(df, 'ship','d',['max','min','mean','std','skew','sum'])\n",
49 | " train = pd.merge(train, t, on='ship', how='left')\n",
50 | " train['x_max_x_min'] = train['x_max'] - train['x_min']\n",
51 | " train['y_max_y_min'] = train['y_max'] - train['y_min']\n",
52 | " train['y_max_x_min'] = train['y_max'] - train['x_min']\n",
53 | " train['x_max_y_min'] = train['x_max'] - train['y_min']\n",
54 | " train['slope'] = train['y_max_y_min'] / np.where(train['x_max_x_min']==0, 0.001, train['x_max_x_min'])\n",
55 | " train['area'] = train['x_max_x_min'] * train['y_max_y_min']\n",
56 | " \n",
57 | " mode_hour = df.groupby('ship')['hour'].agg(lambda x:x.value_counts().index[0]).to_dict()\n",
58 | " train['mode_hour'] = train['ship'].map(mode_hour)\n",
59 | " \n",
60 | " t = group_feature(df, 'ship','hour',['max','min'])\n",
61 | " train = pd.merge(train, t, on='ship', how='left')\n",
62 | " \n",
63 | " hour_nunique = df.groupby('ship')['hour'].nunique().to_dict()\n",
64 | " date_nunique = df.groupby('ship')['date'].nunique().to_dict()\n",
65 | " train['hour_nunique'] = train['ship'].map(hour_nunique)\n",
66 | " train['date_nunique'] = train['ship'].map(date_nunique)\n",
67 | "\n",
68 | " t = df.groupby('ship')['time'].agg({'diff_time':lambda x:np.max(x)-np.min(x)}).reset_index()\n",
69 | " t['diff_day'] = t['diff_time'].dt.days\n",
70 | " t['diff_second'] = t['diff_time'].dt.seconds\n",
71 | " train = pd.merge(train, t, on='ship', how='left')\n",
72 | " return train\n",
73 | "\n",
74 | "def extract_dt(df):\n",
75 | " df['time'] = pd.to_datetime(df['time'], format='%m%d %H:%M:%S')\n",
76 | " # df['month'] = df['time'].dt.month\n",
77 | " # df['day'] = df['time'].dt.day\n",
78 | " df['date'] = df['time'].dt.date\n",
79 | " df['hour'] = df['time'].dt.hour\n",
80 | " # df = df.drop_duplicates(['ship','month'])\n",
81 | " df['weekday'] = df['time'].dt.weekday\n",
82 | " return df"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 70,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "train = pd.read_hdf('../input/train.h5')\n",
92 | "# train = df.drop_duplicates(['ship','type'])"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 71,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "test = pd.read_hdf('../input/test.h5')"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 72,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "train = extract_dt(train)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 73,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "test = extract_dt(test)"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 107,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "train_label = train.drop_duplicates('ship')\n",
129 | "test_label = test.drop_duplicates('ship')"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 108,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/plain": [
140 | "拖网 0.623000\n",
141 | "围网 0.231571\n",
142 | "刺网 0.145429\n",
143 | "Name: type, dtype: float64"
144 | ]
145 | },
146 | "execution_count": 108,
147 | "metadata": {},
148 | "output_type": "execute_result"
149 | }
150 | ],
151 | "source": [
152 | "train_label['type'].value_counts(1)"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 99,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "type_map = dict(zip(train_label['type'].unique(), np.arange(3)))\n",
162 | "type_map_rev = {v:k for k,v in type_map.items()}\n",
163 | "train_label['type'] = train_label['type'].map(type_map)\n"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 100,
169 | "metadata": {},
170 | "outputs": [
171 | {
172 | "name": "stdout",
173 | "output_type": "stream",
174 | "text": [
175 | "{'x_max': 'max', 'x_min': 'min', 'x_mean': 'mean', 'x_std': 'std', 'x_sum': 'sum'}\n",
176 | "{'x_count': 'count'}\n",
177 | "{'y_max': 'max', 'y_min': 'min', 'y_mean': 'mean', 'y_std': 'std', 'y_sum': 'sum'}\n",
178 | "{'v_max': 'max', 'v_min': 'min', 'v_mean': 'mean', 'v_std': 'std', 'v_sum': 'sum'}\n",
179 | "{'d_max': 'max', 'd_min': 'min', 'd_mean': 'mean', 'd_std': 'std', 'd_sum': 'sum'}\n",
180 | "{'hour_max': 'max', 'hour_min': 'min'}\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "train_label = extract_feature(train, train_label)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 101,
191 | "metadata": {},
192 | "outputs": [
193 | {
194 | "name": "stdout",
195 | "output_type": "stream",
196 | "text": [
197 | "{'x_max': 'max', 'x_min': 'min', 'x_mean': 'mean', 'x_std': 'std', 'x_sum': 'sum'}\n",
198 | "{'x_count': 'count'}\n",
199 | "{'y_max': 'max', 'y_min': 'min', 'y_mean': 'mean', 'y_std': 'std', 'y_sum': 'sum'}\n",
200 | "{'v_max': 'max', 'v_min': 'min', 'v_mean': 'mean', 'v_std': 'std', 'v_sum': 'sum'}\n",
201 | "{'d_max': 'max', 'd_min': 'min', 'd_mean': 'mean', 'd_std': 'std', 'd_sum': 'sum'}\n",
202 | "{'hour_max': 'max', 'hour_min': 'min'}\n"
203 | ]
204 | }
205 | ],
206 | "source": [
207 | "test_label = extract_feature(test, test_label)"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 102,
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "\n",
217 | "features = [x for x in train_label.columns if x not in ['ship','type','time','diff_time','date']]\n",
218 | "target = 'type'"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": 103,
224 | "metadata": {},
225 | "outputs": [
226 | {
227 | "name": "stdout",
228 | "output_type": "stream",
229 | "text": [
230 | "40 x,y,v,d,hour,weekday,x_max,x_min,x_mean,x_std,x_sum,x_count,y_max,y_min,y_mean,y_std,y_sum,v_max,v_min,v_mean,v_std,v_sum,d_max,d_min,d_mean,d_std,d_sum,x_max_x_min,y_max_y_min,y_max_x_min,x_max_y_min,slope,area,mode_hour,hour_max,hour_min,hour_nunique,date_nunique,diff_day,diff_second\n"
231 | ]
232 | }
233 | ],
234 | "source": [
235 | "print(len(features), ','.join(features))"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 104,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "params = {\n",
245 | " 'n_estimators': 5000,\n",
246 | " 'boosting_type': 'gbdt',\n",
247 | " 'objective': 'multiclass',\n",
248 | " 'num_class': 3,\n",
249 | " 'early_stopping_rounds': 100,\n",
250 | "}"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 105,
256 | "metadata": {},
257 | "outputs": [
258 | {
259 | "name": "stdout",
260 | "output_type": "stream",
261 | "text": [
262 | "Training until validation scores don't improve for 100 rounds\n",
263 | "[100]\ttraining's multi_logloss: 0.0828667\tvalid_1's multi_logloss: 0.269078\n",
264 | "[200]\ttraining's multi_logloss: 0.022058\tvalid_1's multi_logloss: 0.264574\n",
265 | "Early stopping, best iteration is:\n",
266 | "[180]\ttraining's multi_logloss: 0.0284972\tvalid_1's multi_logloss: 0.262031\n",
267 | "0 val f1 0.8744567161504285\n",
268 | "Training until validation scores don't improve for 100 rounds\n",
269 | "[100]\ttraining's multi_logloss: 0.085238\tvalid_1's multi_logloss: 0.274897\n",
270 | "[200]\ttraining's multi_logloss: 0.0222402\tvalid_1's multi_logloss: 0.272668\n",
271 | "Early stopping, best iteration is:\n",
272 | "[153]\ttraining's multi_logloss: 0.0416896\tvalid_1's multi_logloss: 0.268232\n",
273 | "1 val f1 0.8570390224496975\n",
274 | "Training until validation scores don't improve for 100 rounds\n",
275 | "[100]\ttraining's multi_logloss: 0.0839062\tvalid_1's multi_logloss: 0.266458\n",
276 | "[200]\ttraining's multi_logloss: 0.0228758\tvalid_1's multi_logloss: 0.25578\n",
277 | "Early stopping, best iteration is:\n",
278 | "[164]\ttraining's multi_logloss: 0.0363628\tvalid_1's multi_logloss: 0.254512\n",
279 | "2 val f1 0.8808118299909231\n",
280 | "Training until validation scores don't improve for 100 rounds\n",
281 | "[100]\ttraining's multi_logloss: 0.0845035\tvalid_1's multi_logloss: 0.272673\n",
282 | "[200]\ttraining's multi_logloss: 0.0225549\tvalid_1's multi_logloss: 0.277392\n",
283 | "Early stopping, best iteration is:\n",
284 | "[108]\ttraining's multi_logloss: 0.0758342\tvalid_1's multi_logloss: 0.270036\n",
285 | "3 val f1 0.8629486588985998\n",
286 | "Training until validation scores don't improve for 100 rounds\n",
287 | "[100]\ttraining's multi_logloss: 0.0815182\tvalid_1's multi_logloss: 0.296271\n",
288 | "[200]\ttraining's multi_logloss: 0.0211976\tvalid_1's multi_logloss: 0.295628\n",
289 | "Early stopping, best iteration is:\n",
290 | "[160]\ttraining's multi_logloss: 0.0357663\tvalid_1's multi_logloss: 0.290207\n",
291 | "4 val f1 0.8549111545740181\n"
292 | ]
293 | }
294 | ],
295 | "source": [
296 | "fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n",
297 | "\n",
298 | "X = train_label[features].copy()\n",
299 | "y = train_label[target]\n",
300 | "models = []\n",
301 | "pred = np.zeros((len(test_label),3))\n",
302 | "oof = np.zeros((len(X), 3))\n",
303 | "for index, (train_idx, val_idx) in enumerate(fold.split(X, y)):\n",
304 | "\n",
305 | " train_set = lgb.Dataset(X.iloc[train_idx], y.iloc[train_idx])\n",
306 | " val_set = lgb.Dataset(X.iloc[val_idx], y.iloc[val_idx])\n",
307 | "\n",
308 | " model = lgb.train(params, train_set, valid_sets=[train_set, val_set], verbose_eval=100)\n",
309 | " models.append(model)\n",
310 | " val_pred = model.predict(X.iloc[val_idx])\n",
311 | " oof[val_idx] = val_pred\n",
312 | " val_y = y.iloc[val_idx]\n",
313 | " val_pred = np.argmax(val_pred, axis=1)\n",
314 | " print(index, 'val f1', metrics.f1_score(val_y, val_pred, average='macro'))\n",
315 | " # 0.8695539641133697\n",
316 | " # 0.8866211724839532\n",
317 | "\n",
318 | " test_pred = model.predict(test_label[features])\n",
319 | " pred += test_pred/5"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 106,
325 | "metadata": {},
326 | "outputs": [
327 | {
328 | "name": "stdout",
329 | "output_type": "stream",
330 | "text": [
331 | "oof f1 0.8660762740409558\n"
332 | ]
333 | }
334 | ],
335 | "source": [
336 | "oof = np.argmax(oof, axis=1)\n",
337 | "print('oof f1', metrics.f1_score(oof, y, average='macro'))\n",
338 | "# 0.8701544575329372"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 152,
344 | "metadata": {},
345 | "outputs": [
346 | {
347 | "name": "stdout",
348 | "output_type": "stream",
349 | "text": [
350 | "1 0.6325\n",
351 | "0 0.2390\n",
352 | "2 0.1285\n",
353 | "Name: pred, dtype: float64\n"
354 | ]
355 | }
356 | ],
357 | "source": [
358 | "pred = np.argmax(pred, axis=1)\n",
359 | "sub = test_label[['ship']]\n",
360 | "sub['pred'] = pred\n",
361 | "\n",
362 | "print(sub['pred'].value_counts(1))\n",
363 | "sub['pred'] = sub['pred'].map(type_map_rev)\n",
364 | "sub.to_csv('result.csv', index=None, header=None)"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 84,
370 | "metadata": {},
371 | "outputs": [],
372 | "source": [
373 | "ret = []\n",
374 | "for index, model in enumerate(models):\n",
375 | " df = pd.DataFrame()\n",
376 | " df['name'] = model.feature_name()\n",
377 | " df['score'] = model.feature_importance()\n",
378 | " df['fold'] = index\n",
379 | " ret.append(df)\n",
380 | " \n",
381 | "df = pd.concat(ret)"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": 85,
387 | "metadata": {},
388 | "outputs": [],
389 | "source": [
390 | "df = df.groupby('name', as_index=False)['score'].mean()\n",
391 | "df = df.sort_values(['score'], ascending=False)"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": 86,
397 | "metadata": {},
398 | "outputs": [
399 | {
400 | "data": {
401 | "text/html": [
402 | "\n",
403 | "\n",
416 | "
\n",
417 | " \n",
418 | " \n",
419 | " | \n",
420 | " name | \n",
421 | " score | \n",
422 | "
\n",
423 | " \n",
424 | " \n",
425 | " \n",
426 | " 37 | \n",
427 | " y_max_x_min | \n",
428 | " 676.4 | \n",
429 | "
\n",
430 | " \n",
431 | " 36 | \n",
432 | " y_max | \n",
433 | " 624.6 | \n",
434 | "
\n",
435 | " \n",
436 | " 31 | \n",
437 | " x_min | \n",
438 | " 611.4 | \n",
439 | "
\n",
440 | " \n",
441 | " 29 | \n",
442 | " x_max_y_min | \n",
443 | " 568.0 | \n",
444 | "
\n",
445 | " \n",
446 | " 22 | \n",
447 | " v_std | \n",
448 | " 535.6 | \n",
449 | "
\n",
450 | " \n",
451 | " 35 | \n",
452 | " y | \n",
453 | " 512.0 | \n",
454 | "
\n",
455 | " \n",
456 | " 25 | \n",
457 | " x | \n",
458 | " 458.8 | \n",
459 | "
\n",
460 | " \n",
461 | " 21 | \n",
462 | " v_skew | \n",
463 | " 445.4 | \n",
464 | "
\n",
465 | " \n",
466 | " 40 | \n",
467 | " y_min | \n",
468 | " 422.6 | \n",
469 | "
\n",
470 | " \n",
471 | " 32 | \n",
472 | " x_skew | \n",
473 | " 419.2 | \n",
474 | "
\n",
475 | " \n",
476 | " 41 | \n",
477 | " y_skew | \n",
478 | " 416.4 | \n",
479 | "
\n",
480 | " \n",
481 | " 16 | \n",
482 | " slope | \n",
483 | " 398.2 | \n",
484 | "
\n",
485 | " \n",
486 | " 27 | \n",
487 | " x_max | \n",
488 | " 373.4 | \n",
489 | "
\n",
490 | " \n",
491 | " 10 | \n",
492 | " diff_second | \n",
493 | " 368.6 | \n",
494 | "
\n",
495 | " \n",
496 | " 33 | \n",
497 | " x_std | \n",
498 | " 343.8 | \n",
499 | "
\n",
500 | " \n",
501 | " 3 | \n",
502 | " d_mean | \n",
503 | " 342.2 | \n",
504 | "
\n",
505 | " \n",
506 | " 17 | \n",
507 | " v | \n",
508 | " 341.2 | \n",
509 | "
\n",
510 | " \n",
511 | " 18 | \n",
512 | " v_max | \n",
513 | " 338.6 | \n",
514 | "
\n",
515 | " \n",
516 | " 19 | \n",
517 | " v_mean | \n",
518 | " 331.8 | \n",
519 | "
\n",
520 | " \n",
521 | " 6 | \n",
522 | " d_std | \n",
523 | " 331.4 | \n",
524 | "
\n",
525 | " \n",
526 | " 39 | \n",
527 | " y_mean | \n",
528 | " 320.4 | \n",
529 | "
\n",
530 | " \n",
531 | " 30 | \n",
532 | " x_mean | \n",
533 | " 319.0 | \n",
534 | "
\n",
535 | " \n",
536 | " 42 | \n",
537 | " y_std | \n",
538 | " 285.4 | \n",
539 | "
\n",
540 | " \n",
541 | " 23 | \n",
542 | " v_sum | \n",
543 | " 271.4 | \n",
544 | "
\n",
545 | " \n",
546 | " 26 | \n",
547 | " x_count | \n",
548 | " 265.0 | \n",
549 | "
\n",
550 | " \n",
551 | " 7 | \n",
552 | " d_sum | \n",
553 | " 262.0 | \n",
554 | "
\n",
555 | " \n",
556 | " 28 | \n",
557 | " x_max_x_min | \n",
558 | " 258.4 | \n",
559 | "
\n",
560 | " \n",
561 | " 1 | \n",
562 | " d | \n",
563 | " 252.2 | \n",
564 | "
\n",
565 | " \n",
566 | " 34 | \n",
567 | " x_sum | \n",
568 | " 241.8 | \n",
569 | "
\n",
570 | " \n",
571 | " 5 | \n",
572 | " d_skew | \n",
573 | " 239.4 | \n",
574 | "
\n",
575 | " \n",
576 | " 38 | \n",
577 | " y_max_y_min | \n",
578 | " 233.2 | \n",
579 | "
\n",
580 | " \n",
581 | " 0 | \n",
582 | " area | \n",
583 | " 225.6 | \n",
584 | "
\n",
585 | " \n",
586 | " 43 | \n",
587 | " y_sum | \n",
588 | " 204.4 | \n",
589 | "
\n",
590 | " \n",
591 | " 15 | \n",
592 | " mode_hour | \n",
593 | " 177.8 | \n",
594 | "
\n",
595 | " \n",
596 | " 2 | \n",
597 | " d_max | \n",
598 | " 155.6 | \n",
599 | "
\n",
600 | " \n",
601 | " 20 | \n",
602 | " v_min | \n",
603 | " 61.0 | \n",
604 | "
\n",
605 | " \n",
606 | " 11 | \n",
607 | " hour | \n",
608 | " 26.0 | \n",
609 | "
\n",
610 | " \n",
611 | " 8 | \n",
612 | " date_nunique | \n",
613 | " 25.6 | \n",
614 | "
\n",
615 | " \n",
616 | " 24 | \n",
617 | " weekday | \n",
618 | " 23.2 | \n",
619 | "
\n",
620 | " \n",
621 | " 9 | \n",
622 | " diff_day | \n",
623 | " 20.4 | \n",
624 | "
\n",
625 | " \n",
626 | " 4 | \n",
627 | " d_min | \n",
628 | " 15.2 | \n",
629 | "
\n",
630 | " \n",
631 | " 14 | \n",
632 | " hour_nunique | \n",
633 | " 1.4 | \n",
634 | "
\n",
635 | " \n",
636 | " 13 | \n",
637 | " hour_min | \n",
638 | " 0.0 | \n",
639 | "
\n",
640 | " \n",
641 | " 12 | \n",
642 | " hour_max | \n",
643 | " 0.0 | \n",
644 | "
\n",
645 | " \n",
646 | "
\n",
647 | "
"
648 | ],
649 | "text/plain": [
650 | " name score\n",
651 | "37 y_max_x_min 676.4\n",
652 | "36 y_max 624.6\n",
653 | "31 x_min 611.4\n",
654 | "29 x_max_y_min 568.0\n",
655 | "22 v_std 535.6\n",
656 | "35 y 512.0\n",
657 | "25 x 458.8\n",
658 | "21 v_skew 445.4\n",
659 | "40 y_min 422.6\n",
660 | "32 x_skew 419.2\n",
661 | "41 y_skew 416.4\n",
662 | "16 slope 398.2\n",
663 | "27 x_max 373.4\n",
664 | "10 diff_second 368.6\n",
665 | "33 x_std 343.8\n",
666 | "3 d_mean 342.2\n",
667 | "17 v 341.2\n",
668 | "18 v_max 338.6\n",
669 | "19 v_mean 331.8\n",
670 | "6 d_std 331.4\n",
671 | "39 y_mean 320.4\n",
672 | "30 x_mean 319.0\n",
673 | "42 y_std 285.4\n",
674 | "23 v_sum 271.4\n",
675 | "26 x_count 265.0\n",
676 | "7 d_sum 262.0\n",
677 | "28 x_max_x_min 258.4\n",
678 | "1 d 252.2\n",
679 | "34 x_sum 241.8\n",
680 | "5 d_skew 239.4\n",
681 | "38 y_max_y_min 233.2\n",
682 | "0 area 225.6\n",
683 | "43 y_sum 204.4\n",
684 | "15 mode_hour 177.8\n",
685 | "2 d_max 155.6\n",
686 | "20 v_min 61.0\n",
687 | "11 hour 26.0\n",
688 | "8 date_nunique 25.6\n",
689 | "24 weekday 23.2\n",
690 | "9 diff_day 20.4\n",
691 | "4 d_min 15.2\n",
692 | "14 hour_nunique 1.4\n",
693 | "13 hour_min 0.0\n",
694 | "12 hour_max 0.0"
695 | ]
696 | },
697 | "execution_count": 86,
698 | "metadata": {},
699 | "output_type": "execute_result"
700 | }
701 | ],
702 | "source": [
703 | "df"
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": null,
709 | "metadata": {},
710 | "outputs": [],
711 | "source": []
712 | }
713 | ],
714 | "metadata": {
715 | "kernelspec": {
716 | "display_name": "Python 3",
717 | "language": "python",
718 | "name": "python3"
719 | },
720 | "language_info": {
721 | "codemirror_mode": {
722 | "name": "ipython",
723 | "version": 3
724 | },
725 | "file_extension": ".py",
726 | "mimetype": "text/x-python",
727 | "name": "python",
728 | "nbconvert_exporter": "python",
729 | "pygments_lexer": "ipython3",
730 | "version": "3.7.3"
731 | },
732 | "toc": {
733 | "base_numbering": 1,
734 | "nav_menu": {},
735 | "number_sections": true,
736 | "sideBar": true,
737 | "skip_h1_title": false,
738 | "title_cell": "Table of Contents",
739 | "title_sidebar": "Contents",
740 | "toc_cell": false,
741 | "toc_position": {},
742 | "toc_section_display": true,
743 | "toc_window_display": false
744 | }
745 | },
746 | "nbformat": 4,
747 | "nbformat_minor": 2
748 | }
749 |
--------------------------------------------------------------------------------