├── LICENSE
├── README.md
├── code
    ├── README.md
    ├── base_features_006_filter_error_6_std.ipynb
    ├── bazin_003.ipynb
    ├── celerite_003.ipynb
    ├── eda_024_bazin.ipynb
    ├── eda_026_newling.ipynb
    ├── eda_031_celerite.ipynb
    ├── lgb_best.ipynb
    ├── newling_003.ipynb
    └── test_chunks.ipynb
├── data
    └── README.md
├── input
    └── README.md
└── submissions
    └── README.md


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Kaggle_PLAsTiCC
 2 | Part of our solution to PLAsTiCC Kaggle challenge
 3 | 
 4 | I was part of a team that finished 5th in this very challlenging Kaglle competition: https://www.kaggle.com/c/PLAsTiCC-2018 . It was challenging because we had to classify unevenly spaces time series.  All time series problems I worked on before were regularly samples time series. Sure, some value could be missing, but nothing like what we have here.  Moreover, it was an  open classification problem, with more classes in the test data than in the train data.
 5 | 
 6 | I describe my part of the solution here: https://www.kaggle.com/c/PLAsTiCC-2018/discussion/75050 mostly feature engineering and lightgbm models.
 7 | 
 8 | A team mate, Kun Hao Yeh, describes his here: https://www.kaggle.com/c/PLAsTiCC-2018/discussion/75040  mostly RNN.
 9 | 
10 | What is missing is a description of how we stacked models, due to our third team member, SomethingIsWrong.
11 | 
12 | The code in the code directory assumes that the competition data is in the input directory.  The data directory is used to store additional data.
13 | 
14 | The submissions directory contains files ready for submission.
15 | 


--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
 1 | Notebooks should be executed in that order:
 2 | 
 3 | * `test_chunks.ipynb`  Splits test data into chunks and stores them.
 4 | * `base_features_006_filter_error_6_std.ipynb`  Base features used for lightgbm models.
 5 | * `bazin_003.ipynb` Fits Bazin curves for all extra galactic sources.
 6 | * `newling_003.ipynb` Fits Newling curves for all extra galactic sources.
 7 | * `lgb_best.ipynb` Computes a submisison file using the above features.  It is derived from the best model we had that scored 0.752 on the public leaderboard.  The difference with this one is that our best model used out of fold predictions from other models, mostly RNN and MLP mdoels produced by my team mates.  I removed that part to make the code self contained.
 8 | 
 9 | These notebooks aren't used for the competition solutions, but they contain pretty graphics:
10 | * `eda_031_celerite.ipynb` Fits gaussian process, generates curves from it, and display them.
11 | * `eda_024_bazin.ipynb`  Fits and displays Bazin curves.
12 | * `eda_026_newling.ipynb` Fits and displays Newling curves.
13 | 


--------------------------------------------------------------------------------
/code/base_features_006_filter_error_6_std.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "fname='base_006'\n",
 10 |     "\n",
 11 |     "n_tta = 6\n",
 12 |     "\n",
 13 |     "seed = 0"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import numpy as np\n",
 23 |     "import pandas as pd\n",
 24 |     "from sklearn.model_selection import StratifiedKFold\n",
 25 |     "from sklearn.metrics import confusion_matrix\n",
 26 |     "from scipy.optimize import curve_fit\n",
 27 |     "import gc\n",
 28 |     "import matplotlib.pyplot as plt\n",
 29 |     "import seaborn as sns\n",
 30 |     "import lightgbm as lgb\n",
 31 |     "import xgboost as xgb\n",
 32 |     "import logging\n",
 33 |     "from tqdm import tqdm_notebook\n",
 34 |     "import itertools\n",
 35 |     "import pickle as pkl\n",
 36 |     "\n",
 37 |     "from multiprocessing import Pool"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import random as rn\n",
 47 |     "def init_seeds(seed):\n",
 48 |     "\n",
 49 |     "    # The below is necessary for starting Numpy generated random numbers\n",
 50 |     "    # in a well-defined initial state.\n",
 51 |     "\n",
 52 |     "    np.random.seed(seed)\n",
 53 |     "\n",
 54 |     "    # The below is necessary for starting core Python generated random numbers\n",
 55 |     "    # in a well-defined state.\n",
 56 |     "\n",
 57 |     "    rn.seed(seed)\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "init_seeds(seed)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 5,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "def get_aggregations():\n",
 70 |     "    aggs = {\n",
 71 |     "        'flux' : ['min', 'max', 'mean', 'std', 'skew'],\n",
 72 |     "        'flux_delta' : ['mean', 'median', 'std'],\n",
 73 |     "        'flux_err' : ['min', 'max', 'mean', 'median', 'std'],\n",
 74 |     "        'detected' : ['mean'],  # ''min', 'max', 'mean', 'median', 'std'],\n",
 75 |     "        'flux_ratio_sq' : ['sum'],\n",
 76 |     "        'flux_by_flux_ratio_sq' : ['sum'],\n",
 77 |     "        'mjd_detected' : ['min', 'max'],\n",
 78 |     "        'mjd_detected_std' : ['min', 'max'],\n",
 79 |     "        'flux_detected' : ['mean'],  # ''min', 'max', 'mean', 'median', 'std'],\n",
 80 |     "        'flux_slope_change' : ['mean'],\n",
 81 |     "        'scale':['mean'],\n",
 82 |     "        'magnitude':['mean'],\n",
 83 |     "    }\n",
 84 |     "    \n",
 85 |     "    for pb in range(6):\n",
 86 |     "        flux_pb = 'flux_%d' % pb\n",
 87 |     "        aggs[flux_pb] = ['min', 'max', 'mean', 'median', 'std', 'skew']\n",
 88 |     "        flux_delta_pb = 'flux_delta_%d' % pb\n",
 89 |     "        aggs[flux_delta_pb] = ['std']\n",
 90 |     "        detected_pb = 'detected_%d' % pb\n",
 91 |     "        aggs[detected_pb] = ['mean']\n",
 92 |     "        flux_pb_detected = 'flux_%d_detected' % pb\n",
 93 |     "        aggs[flux_pb_detected] = ['mean']\n",
 94 |     "        flux_ratio_sq_pb = 'flux_ratio_sq_%d' % pb\n",
 95 |     "        aggs[flux_ratio_sq_pb] = ['sum']\n",
 96 |     "        flux_by_flux_ratio_sq_pb = 'flux_by_flux_ratio_sq_%d' % pb\n",
 97 |     "        aggs[flux_by_flux_ratio_sq_pb] = ['sum']\n",
 98 |     "    return aggs\n",
 99 |     "\n",
100 |     "\n",
101 |     "def get_new_columns(aggs):\n",
102 |     "    return [k + '_' + agg for k in aggs.keys() for agg in aggs[k]]\n",
103 |     "\n",
104 |     "def apply_kurt(df):\n",
105 |     "    cols = ['flux'] + ['flux_%d' % pb for pb in range(6)]\n",
106 |     "    agg =  df.groupby('object_id')[cols].apply(pd.DataFrame.kurt)\n",
107 |     "    agg.columns = [c+'_kurt' for c in agg.columns]\n",
108 |     "    return agg\n",
109 |     "\n",
110 |     "def apply_kurt_delta(df):\n",
111 |     "    cols = ['flux_delta'] \n",
112 |     "    agg =  df.groupby('object_id')[cols].apply(pd.DataFrame.kurt)\n",
113 |     "    agg.columns = [c+'_kurt' for c in agg.columns]\n",
114 |     "    return agg\n",
115 |     "\n",
116 |     "def add_features_to_agg(df):\n",
117 |     "    df['mjd_detected_diff'] = df['mjd_detected_max'] - df['mjd_detected_min']\n",
118 |     "    del df['mjd_detected_max'], df['mjd_detected_min']\n",
119 |     "    df['mjd_detected_std_diff'] = df['mjd_detected_std_max'] - df['mjd_detected_std_min']\n",
120 |     "    del df['mjd_detected_std_max'], df['mjd_detected_std_min']\n",
121 |     "    df['flux_diff'] = df['flux_max'] - df['flux_min']\n",
122 |     "    df['flux_dif2'] = (df['flux_max'] - df['flux_min']) / df['flux_mean']\n",
123 |     "    df['flux_w_mean'] = df['flux_by_flux_ratio_sq_sum'] / df['flux_ratio_sq_sum']\n",
124 |     "    df['flux_dif3'] = (df['flux_max'] - df['flux_min']) / df['flux_w_mean']\n",
125 |     "    df['flux_detected_ratio'] = df['flux_detected_mean'] / df['flux_mean']\n",
126 |     "    df['flux_delta_mean_ratio'] = df['flux_delta_mean'] / df['flux_mean']\n",
127 |     "    df['flux_delta_std_ratio'] = df['flux_delta_std'] / df['flux_std']\n",
128 |     "    #df['flux_delta_skew_ratio'] = df['flux_delta_skew'] / df['flux_skew']\n",
129 |     "    #del df['flux_delta_skew']\n",
130 |     "    #df['flux_delta_kurt_ratio'] = df['flux_delta_kurt'] / df['flux_kurt']\n",
131 |     "    for pb in range(6):\n",
132 |     "        #df['flux_%d_diff' % pb] = df['flux_%d_max' % pb] - df['flux_%d_min' % pb]\n",
133 |     "        #df['flux_%d_diff_2' % pb] = (df['flux_%d_max' % pb] - df['flux_%d_min' % pb]) / df['flux_%d_mean' % pb]\n",
134 |     "\n",
135 |     "        df['flux_%d_detected_ratio' % pb] = df['flux_%d_detected_mean' % pb] / df['flux_%d_mean' % pb]\n",
136 |     "        df['flux_%d_mean' % pb] /= df.flux_mean\n",
137 |     "        df['flux_%d_detected_mean' % pb] /= df.flux_detected_mean\n",
138 |     "        df['flux_%d_max_ratio' % pb] = df['flux_%d_max' % pb] / df['flux_max']\n",
139 |     "        #df['flux_%d_min_ratio' % pb] = df['flux_%d_min' % pb] / df['flux_min']\n",
140 |     "        #df['flux_delta_%d_std_ratio' % pb] = df['flux_delta_%d_std' % pb] / df['flux_delta_std']\n",
141 |     "        df['flux_delta_%d_std_ratio_2' % pb] = df['flux_delta_%d_std' % pb] / df['flux_std']\n",
142 |     "        #df['flux_%d_std_ratio' % pb] = df['flux_%d_std' % pb] / df['flux_std']\n",
143 |     "        df['flux_%d_w_mean' % pb] = df['flux_by_flux_ratio_sq_%d_sum' % pb] / df['flux_ratio_sq_%d_sum' % pb]\n",
144 |     "        df['flux_%d_dif3'] = (df['flux_%d_max' % pb] - df['flux_%d_min' % pb]) / df['flux_%d_w_mean' % pb]\n",
145 |     "        df['flux_%d_w_mean' % pb] /= df['flux_w_mean']\n",
146 |     "        #df['flux_ratio_sq_%d_sum' % pb] /= df['flux_ratio_sq_sum']\n",
147 |     "        del df['flux_delta_%d_std' % pb], df['flux_by_flux_ratio_sq_%d_sum' % pb]\n",
148 |     "        #del df['flux_ratio_sq_%d_sum' % pb]\n",
149 |     "    del df['flux_by_flux_ratio_sq_sum']\n",
150 |     "    return df"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 6,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "def add_features_before_agg(df):\n",
160 |     "    \n",
161 |     "    df['flux_ratio_sq'] = np.power(df['flux'] / df['flux_err'], 2.0)\n",
162 |     "    df['flux_by_flux_ratio_sq'] = df['flux'] * df['flux_ratio_sq']\n",
163 |     "    \n",
164 |     "    df['mjd_detected'] = np.NaN\n",
165 |     "    df.loc[df.detected == 1, 'mjd_detected'] = df.loc[df.detected == 1, 'mjd']\n",
166 |     "    \n",
167 |     "    df['flux_detected'] = np.NaN\n",
168 |     "    df.loc[df.detected == 1, 'flux_detected'] = df.loc[df.detected == 1, 'flux']\n",
169 |     "\n",
170 |     "    df['mjd_detected_std'] = np.NaN\n",
171 |     "    df.loc[df.detected_std == 1, 'mjd_detected_std'] = df.loc[df.detected_std == 1, 'mjd']\n",
172 |     "    \n",
173 |     "    gr = df.groupby(['object_id', 'passband'])\n",
174 |     "    df['flux_prev'] = gr.flux.shift(1)\n",
175 |     "    df['mjd_prev'] = gr.mjd.shift(1)\n",
176 |     "    \n",
177 |     "    df['flux_delta'] = (df.flux - df.flux_prev) \n",
178 |     "    df['flux_delta_abs'] = np.abs(df.flux_delta)\n",
179 |     "\n",
180 |     "    df.loc[df.flux_delta_abs * df.scale < 10, 'flux_delta'] = np.NaN\n",
181 |     "    df.loc[(df.mjd - df.mjd_prev) > 100, 'flux_delta'] = np.NaN\n",
182 |     "    df['flux_slope'] = np.sign(df.flux_delta) \n",
183 |     "                               \n",
184 |     "    df['flux_slope_prev'] = gr.flux_slope.shift(1).fillna('prev')\n",
185 |     "    df['flux_slope_change'] = 1*(df['flux_slope'] != df['flux_slope_prev'])                               \n",
186 |     "    del df['flux_prev'], df['flux_slope_prev'], df['flux_slope'], df['mjd_prev']\n",
187 |     "\n",
188 |     "    for pb in range(6):\n",
189 |     "        filter_p = (df.passband == pb)\n",
190 |     "        \n",
191 |     "        flux_pb = 'flux_%d' % pb\n",
192 |     "        df[flux_pb] = np.NaN\n",
193 |     "        df.loc[filter_p, flux_pb] = df.loc[filter_p, 'flux']\n",
194 |     "\n",
195 |     "        flux_delta_pb = 'flux_delta_%d' % pb\n",
196 |     "        df[flux_delta_pb] = np.NaN\n",
197 |     "        df.loc[filter_p, flux_delta_pb] = df.loc[filter_p, 'flux_delta']\n",
198 |     "        \n",
199 |     "        detected_pb = 'detected_%d' % pb\n",
200 |     "        df[detected_pb] = 0\n",
201 |     "        df.loc[filter_p, detected_pb] = df.loc[filter_p, 'detected']\n",
202 |     "        \n",
203 |     "        flux_pb_detected = 'flux_%d_detected' % pb\n",
204 |     "        df[flux_pb_detected] = np.NaN\n",
205 |     "        df.loc[filter_p, flux_pb_detected] = df.loc[filter_p, 'flux_detected']\n",
206 |     "        \n",
207 |     "        flux_ratio_sq_pb = 'flux_ratio_sq_%d' % pb\n",
208 |     "        df[flux_ratio_sq_pb] = np.NaN\n",
209 |     "        df.loc[filter_p, flux_ratio_sq_pb] = df.loc[filter_p, 'flux_ratio_sq']\n",
210 |     "\n",
211 |     "        flux_by_flux_ratio_sq_pb = 'flux_by_flux_ratio_sq_%d' % pb\n",
212 |     "        df[flux_by_flux_ratio_sq_pb] = np.NaN\n",
213 |     "        df.loc[filter_p, flux_by_flux_ratio_sq_pb] = df.loc[filter_p, 'flux_by_flux_ratio_sq']\n",
214 |     "\n",
215 |     "   "
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 8,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "def add_features(df_, meta_, throughputs=throughputs):\n",
225 |     "    df_ = df_.copy()\n",
226 |     "    \n",
227 |     "    #df_['scale'] = 1\n",
228 |     "    gr = df_.groupby('object_id')\n",
229 |     "    df_['scale'] = gr.flux.transform('max')\n",
230 |     "    df_['magnitude'] = df_['scale'] - gr.flux.transform('min')\n",
231 |     "    \n",
232 |     "    df_.flux /= df_.scale\n",
233 |     "    df_.flux_err /= df_.scale\n",
234 |     "    \n",
235 |     "    gr = df_.groupby(['object_id', 'passband'])  \n",
236 |     "    flux_err_mean = gr.flux_err.transform('mean')\n",
237 |     "    flux_err_std = gr.flux_err.transform('std')\n",
238 |     "    df_ = df_[df_.flux_err <= flux_err_mean + 6*flux_err_std].copy()\n",
239 |     "\n",
240 |     "    gr = df_.groupby(['object_id', 'passband'])  \n",
241 |     "    flux_std = gr.flux.transform('std')\n",
242 |     "    flux_mean = gr.flux.transform('mean')\n",
243 |     "    df_['detected_std'] = df_.detected * (df_.flux > flux_mean + 1*flux_std)\n",
244 |     "    \n",
245 |     "    add_features_before_agg(df_)\n",
246 |     "\n",
247 |     "    aggs = get_aggregations()\n",
248 |     "    new_columns = get_new_columns(aggs)\n",
249 |     "\n",
250 |     "    agg_ = df_.groupby('object_id').agg(aggs)\n",
251 |     "    agg_.columns = new_columns\n",
252 |     "\n",
253 |     "    agg_ = add_features_to_agg(df=agg_)\n",
254 |     "    \n",
255 |     "    agg_kurt = apply_kurt(df_)\n",
256 |     "    \n",
257 |     "    #agg_kurt_delta = apply_kurt_delta(df_)\n",
258 |     "    \n",
259 |     "    agg_ = pd.concat([agg_, agg_kurt], axis=1).reset_index()\n",
260 |     "    #agg_ = agg_.merge(agg_bazin, how='left', on='object_id')\n",
261 |     "\n",
262 |     "    # Merge with meta data\n",
263 |     "    full_df = agg_.merge(\n",
264 |     "        right=meta_,\n",
265 |     "        how='left',\n",
266 |     "        on='object_id'\n",
267 |     "    )\n",
268 |     "    full_df['magnitude_mean'] *= (full_df.hostgal_photoz ** 2)\n",
269 |     "\n",
270 |     "    del agg_\n",
271 |     "    return full_df"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 9,
277 |    "metadata": {},
278 |    "outputs": [],
279 |    "source": [
280 |     "# training time data augmentation\n",
281 |     "\n",
282 |     "def get_tta(train, meta_train, i):\n",
283 |     "    df = train.copy()\n",
284 |     "    init_seeds(i)\n",
285 |     "    if i > 0:\n",
286 |     "        df['flux'] += df['flux_err'] * np.random.randn(*df['flux_err'].shape)\n",
287 |     "    df = add_features(df, meta_train)\n",
288 |     "    \n",
289 |     "    return df"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 10,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "data": {
299 |       "text/html": [
300 |        "<div>\n",
301 |        "<style scoped>\n",
302 |        "    .dataframe tbody tr th:only-of-type {\n",
303 |        "        vertical-align: middle;\n",
304 |        "    }\n",
305 |        "\n",
306 |        "    .dataframe tbody tr th {\n",
307 |        "        vertical-align: top;\n",
308 |        "    }\n",
309 |        "\n",
310 |        "    .dataframe thead th {\n",
311 |        "        text-align: right;\n",
312 |        "    }\n",
313 |        "</style>\n",
314 |        "<table border=\"1\" class=\"dataframe\">\n",
315 |        "  <thead>\n",
316 |        "    <tr style=\"text-align: right;\">\n",
317 |        "      <th></th>\n",
318 |        "      <th>object_id</th>\n",
319 |        "      <th>mjd</th>\n",
320 |        "      <th>passband</th>\n",
321 |        "      <th>flux</th>\n",
322 |        "      <th>flux_err</th>\n",
323 |        "      <th>detected</th>\n",
324 |        "    </tr>\n",
325 |        "  </thead>\n",
326 |        "  <tbody>\n",
327 |        "    <tr>\n",
328 |        "      <th>0</th>\n",
329 |        "      <td>615</td>\n",
330 |        "      <td>59750.4229</td>\n",
331 |        "      <td>2</td>\n",
332 |        "      <td>-544.810303</td>\n",
333 |        "      <td>3.622952</td>\n",
334 |        "      <td>1</td>\n",
335 |        "    </tr>\n",
336 |        "    <tr>\n",
337 |        "      <th>1</th>\n",
338 |        "      <td>615</td>\n",
339 |        "      <td>59750.4306</td>\n",
340 |        "      <td>1</td>\n",
341 |        "      <td>-816.434326</td>\n",
342 |        "      <td>5.553370</td>\n",
343 |        "      <td>1</td>\n",
344 |        "    </tr>\n",
345 |        "    <tr>\n",
346 |        "      <th>2</th>\n",
347 |        "      <td>615</td>\n",
348 |        "      <td>59750.4383</td>\n",
349 |        "      <td>3</td>\n",
350 |        "      <td>-471.385529</td>\n",
351 |        "      <td>3.801213</td>\n",
352 |        "      <td>1</td>\n",
353 |        "    </tr>\n",
354 |        "    <tr>\n",
355 |        "      <th>3</th>\n",
356 |        "      <td>615</td>\n",
357 |        "      <td>59750.4450</td>\n",
358 |        "      <td>4</td>\n",
359 |        "      <td>-388.984985</td>\n",
360 |        "      <td>11.395031</td>\n",
361 |        "      <td>1</td>\n",
362 |        "    </tr>\n",
363 |        "    <tr>\n",
364 |        "      <th>4</th>\n",
365 |        "      <td>615</td>\n",
366 |        "      <td>59752.4070</td>\n",
367 |        "      <td>2</td>\n",
368 |        "      <td>-681.858887</td>\n",
369 |        "      <td>4.041204</td>\n",
370 |        "      <td>1</td>\n",
371 |        "    </tr>\n",
372 |        "  </tbody>\n",
373 |        "</table>\n",
374 |        "</div>"
375 |       ],
376 |       "text/plain": [
377 |        "   object_id         mjd  passband        flux   flux_err  detected\n",
378 |        "0        615  59750.4229         2 -544.810303   3.622952         1\n",
379 |        "1        615  59750.4306         1 -816.434326   5.553370         1\n",
380 |        "2        615  59750.4383         3 -471.385529   3.801213         1\n",
381 |        "3        615  59750.4450         4 -388.984985  11.395031         1\n",
382 |        "4        615  59752.4070         2 -681.858887   4.041204         1"
383 |       ]
384 |      },
385 |      "execution_count": 10,
386 |      "metadata": {},
387 |      "output_type": "execute_result"
388 |     }
389 |    ],
390 |    "source": [
391 |     "train = pd.read_csv('../input/training_set.csv')\n",
392 |     "train.head()"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 11,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/html": [
403 |        "<div>\n",
404 |        "<style scoped>\n",
405 |        "    .dataframe tbody tr th:only-of-type {\n",
406 |        "        vertical-align: middle;\n",
407 |        "    }\n",
408 |        "\n",
409 |        "    .dataframe tbody tr th {\n",
410 |        "        vertical-align: top;\n",
411 |        "    }\n",
412 |        "\n",
413 |        "    .dataframe thead th {\n",
414 |        "        text-align: right;\n",
415 |        "    }\n",
416 |        "</style>\n",
417 |        "<table border=\"1\" class=\"dataframe\">\n",
418 |        "  <thead>\n",
419 |        "    <tr style=\"text-align: right;\">\n",
420 |        "      <th></th>\n",
421 |        "      <th>object_id</th>\n",
422 |        "      <th>ddf</th>\n",
423 |        "      <th>hostgal_photoz</th>\n",
424 |        "      <th>target</th>\n",
425 |        "    </tr>\n",
426 |        "  </thead>\n",
427 |        "  <tbody>\n",
428 |        "    <tr>\n",
429 |        "      <th>0</th>\n",
430 |        "      <td>615</td>\n",
431 |        "      <td>1</td>\n",
432 |        "      <td>0.0000</td>\n",
433 |        "      <td>92</td>\n",
434 |        "    </tr>\n",
435 |        "    <tr>\n",
436 |        "      <th>1</th>\n",
437 |        "      <td>713</td>\n",
438 |        "      <td>1</td>\n",
439 |        "      <td>1.6267</td>\n",
440 |        "      <td>88</td>\n",
441 |        "    </tr>\n",
442 |        "    <tr>\n",
443 |        "      <th>2</th>\n",
444 |        "      <td>730</td>\n",
445 |        "      <td>1</td>\n",
446 |        "      <td>0.2262</td>\n",
447 |        "      <td>42</td>\n",
448 |        "    </tr>\n",
449 |        "    <tr>\n",
450 |        "      <th>3</th>\n",
451 |        "      <td>745</td>\n",
452 |        "      <td>1</td>\n",
453 |        "      <td>0.2813</td>\n",
454 |        "      <td>90</td>\n",
455 |        "    </tr>\n",
456 |        "    <tr>\n",
457 |        "      <th>4</th>\n",
458 |        "      <td>1124</td>\n",
459 |        "      <td>1</td>\n",
460 |        "      <td>0.2415</td>\n",
461 |        "      <td>90</td>\n",
462 |        "    </tr>\n",
463 |        "  </tbody>\n",
464 |        "</table>\n",
465 |        "</div>"
466 |       ],
467 |       "text/plain": [
468 |        "   object_id  ddf  hostgal_photoz  target\n",
469 |        "0        615    1          0.0000      92\n",
470 |        "1        713    1          1.6267      88\n",
471 |        "2        730    1          0.2262      42\n",
472 |        "3        745    1          0.2813      90\n",
473 |        "4       1124    1          0.2415      90"
474 |       ]
475 |      },
476 |      "execution_count": 11,
477 |      "metadata": {},
478 |      "output_type": "execute_result"
479 |     }
480 |    ],
481 |    "source": [
482 |     "meta_cols = ['object_id', 'ddf', 'hostgal_photoz', 'target']\n",
483 |     "meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n",
484 |     "meta_train.head()"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": 12,
490 |    "metadata": {
491 |     "scrolled": true
492 |    },
493 |    "outputs": [
494 |     {
495 |      "data": {
496 |       "application/vnd.jupyter.widget-view+json": {
497 |        "model_id": "2c2dd22bf588423bbceb9b32fecc7621",
498 |        "version_major": 2,
499 |        "version_minor": 0
500 |       },
501 |       "text/plain": [
502 |        "HBox(children=(IntProgress(value=0, max=11), HTML(value='')))"
503 |       ]
504 |      },
505 |      "metadata": {},
506 |      "output_type": "display_data"
507 |     },
508 |     {
509 |      "name": "stdout",
510 |      "output_type": "stream",
511 |      "text": [
512 |       "\n"
513 |      ]
514 |     }
515 |    ],
516 |    "source": [
517 |     "n_tta = 11\n",
518 |     "\n",
519 |     "ttas = [get_tta(train, meta_train, i) for i in tqdm_notebook(range(11))]\n",
520 |     "\n",
521 |     "#for tta in ttas:\n",
522 |     "#    tta.fillna(train_mean, inplace=True)"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": 13,
528 |    "metadata": {},
529 |    "outputs": [],
530 |    "source": [
531 |     "with open('../data/ttas_%s.pkl' % fname, 'wb') as file:\n",
532 |     "    pkl.dump(ttas, file)\n"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": 14,
538 |    "metadata": {},
539 |    "outputs": [],
540 |    "source": [
541 |     "def work_test(param):\n",
542 |     "    (chunk_id, fname) = param\n",
543 |     "    print('starting worker', chunk_id)\n",
544 |     "    meta_test = pd.read_csv('../input/test_set_metadata.csv')\n",
545 |     "    with open('../input/test_chunk_%d.csv' % chunk_id, 'rb') as file:\n",
546 |     "        test_chunk = pkl.load(file)\n",
547 |     "    full_test = add_features(test_chunk, meta_test)\n",
548 |     "    \n",
549 |     "    with open('../data/full_test_chunk_%s_%d.pkl' % (fname, chunk_id), 'wb') as file:\n",
550 |     "        pkl.dump(full_test, file)\n",
551 |     "    print('ending worker', chunk_id)\n",
552 |     "    return 'done'"
553 |    ]
554 |   },
555 |   {
556 |    "cell_type": "code",
557 |    "execution_count": 15,
558 |    "metadata": {
559 |     "scrolled": true
560 |    },
561 |    "outputs": [
562 |     {
563 |      "name": "stdout",
564 |      "output_type": "stream",
565 |      "text": [
566 |       "starting worker 0\n",
567 |       "starting worker 1\n",
568 |       "starting worker 3\n",
569 |       "starting worker 2\n",
570 |       "starting worker 4\n",
571 |       "ending worker 0\n",
572 |       "starting worker 5\n",
573 |       "ending worker 1\n",
574 |       "starting worker 6\n",
575 |       "ending worker 2\n",
576 |       "starting worker 7\n",
577 |       "ending worker 3\n",
578 |       "starting worker 8\n",
579 |       "ending worker 4\n",
580 |       "starting worker 9\n",
581 |       "ending worker 5\n",
582 |       "starting worker 10\n",
583 |       "ending worker 6\n",
584 |       "starting worker 11\n",
585 |       "ending worker 7\n",
586 |       "starting worker 12\n",
587 |       "ending worker 9\n",
588 |       "starting worker 13\n",
589 |       "ending worker 8\n",
590 |       "starting worker 14\n",
591 |       "ending worker 10\n",
592 |       "starting worker 15\n",
593 |       "ending worker 11\n",
594 |       "starting worker 16\n",
595 |       "ending worker 12\n",
596 |       "starting worker 17\n",
597 |       "ending worker 13\n",
598 |       "starting worker 18\n",
599 |       "ending worker 14\n",
600 |       "starting worker 19\n",
601 |       "ending worker 15\n",
602 |       "starting worker 20\n",
603 |       "ending worker 16\n",
604 |       "starting worker 21\n",
605 |       "ending worker 17\n",
606 |       "starting worker 22\n",
607 |       "ending worker 18\n",
608 |       "starting worker 23\n",
609 |       "ending worker 19\n",
610 |       "starting worker 24\n",
611 |       "ending worker 20\n",
612 |       "starting worker 25\n",
613 |       "ending worker 21\n",
614 |       "starting worker 26\n",
615 |       "ending worker 22\n",
616 |       "starting worker 27\n",
617 |       "ending worker 23\n",
618 |       "starting worker 28\n",
619 |       "ending worker 24\n",
620 |       "starting worker 29\n",
621 |       "ending worker 25\n",
622 |       "starting worker 30\n",
623 |       "ending worker 26\n",
624 |       "starting worker 31\n",
625 |       "ending worker 27\n",
626 |       "starting worker 32\n",
627 |       "ending worker 28\n",
628 |       "starting worker 33\n",
629 |       "ending worker 29\n",
630 |       "starting worker 34\n",
631 |       "ending worker 30\n",
632 |       "starting worker 35\n",
633 |       "ending worker 31\n",
634 |       "starting worker 36\n",
635 |       "ending worker 32\n",
636 |       "starting worker 37\n",
637 |       "ending worker 33\n",
638 |       "starting worker 38\n",
639 |       "ending worker 34\n",
640 |       "starting worker 39\n",
641 |       "ending worker 35\n",
642 |       "starting worker 40\n",
643 |       "ending worker 36\n",
644 |       "starting worker 41\n",
645 |       "ending worker 37\n",
646 |       "starting worker 42\n",
647 |       "ending worker 38\n",
648 |       "starting worker 43\n",
649 |       "ending worker 39\n",
650 |       "starting worker 44\n",
651 |       "ending worker 40\n",
652 |       "starting worker 45\n",
653 |       "ending worker 41\n",
654 |       "starting worker 46\n",
655 |       "ending worker 42\n",
656 |       "starting worker 47\n",
657 |       "ending worker 43\n",
658 |       "starting worker 48\n",
659 |       "ending worker 44\n",
660 |       "starting worker 49\n",
661 |       "ending worker 45\n",
662 |       "starting worker 50\n",
663 |       "ending worker 46\n",
664 |       "starting worker 51\n",
665 |       "ending worker 47\n",
666 |       "starting worker 52\n",
667 |       "ending worker 48\n",
668 |       "starting worker 53\n",
669 |       "ending worker 49\n",
670 |       "starting worker 54\n",
671 |       "ending worker 50\n",
672 |       "starting worker 55\n",
673 |       "ending worker 51\n",
674 |       "starting worker 56\n",
675 |       "ending worker 52\n",
676 |       "starting worker 57\n",
677 |       "ending worker 53\n",
678 |       "starting worker 58\n",
679 |       "ending worker 54\n",
680 |       "starting worker 59\n",
681 |       "ending worker 55\n",
682 |       "starting worker 60\n",
683 |       "ending worker 56\n",
684 |       "starting worker 61\n",
685 |       "ending worker 57\n",
686 |       "starting worker 62\n",
687 |       "ending worker 58\n",
688 |       "starting worker 63\n",
689 |       "ending worker 59\n",
690 |       "starting worker 64\n",
691 |       "ending worker 60\n",
692 |       "starting worker 65\n",
693 |       "ending worker 61\n",
694 |       "starting worker 66\n",
695 |       "ending worker 62\n",
696 |       "starting worker 67\n",
697 |       "ending worker 63\n",
698 |       "starting worker 68\n",
699 |       "ending worker 64\n",
700 |       "starting worker 69\n",
701 |       "ending worker 65\n",
702 |       "starting worker 70\n",
703 |       "ending worker 66\n",
704 |       "starting worker 71\n",
705 |       "ending worker 67\n",
706 |       "starting worker 72\n",
707 |       "ending worker 68\n",
708 |       "starting worker 73\n",
709 |       "ending worker 69\n",
710 |       "starting worker 74\n",
711 |       "ending worker 71\n",
712 |       "starting worker 75\n",
713 |       "ending worker 70\n",
714 |       "starting worker 76\n",
715 |       "ending worker 73\n",
716 |       "starting worker 77\n",
717 |       "ending worker 72\n",
718 |       "starting worker 78\n",
719 |       "ending worker 74\n",
720 |       "starting worker 79\n",
721 |       "ending worker 75\n",
722 |       "starting worker 80\n",
723 |       "ending worker 76\n",
724 |       "starting worker 81\n",
725 |       "ending worker 77\n",
726 |       "starting worker 82\n",
727 |       "ending worker 78\n",
728 |       "starting worker 83\n",
729 |       "ending worker 79\n",
730 |       "starting worker 84\n",
731 |       "ending worker 80\n",
732 |       "starting worker 85\n",
733 |       "ending worker 81\n",
734 |       "starting worker 86\n",
735 |       "ending worker 82\n",
736 |       "starting worker 87\n",
737 |       "ending worker 83\n",
738 |       "starting worker 88\n",
739 |       "ending worker 84\n",
740 |       "starting worker 89\n",
741 |       "ending worker 85\n",
742 |       "starting worker 90\n",
743 |       "ending worker 86\n",
744 |       "starting worker 100\n",
745 |       "ending worker 100\n",
746 |       "ending worker 87\n",
747 |       "ending worker 88\n",
748 |       "ending worker 90\n",
749 |       "ending worker 89\n"
750 |      ]
751 |     }
752 |    ],
753 |    "source": [
754 |     "params = [(i, fname) for i in range(91)]\n",
755 |     "params.append((100, fname))\n",
756 |     "\n",
757 |     "if 1: \n",
758 |     "    pool = Pool(processes=5, maxtasksperchild=1)\n",
759 |     "    ls   = pool.map( work_test, params, chunksize=1 )\n",
760 |     "    pool.close()\n",
761 |     "else:\n",
762 |     "    ls = [work_test(param) for param in params]"
763 |    ]
764 |   },
765 |   {
766 |    "cell_type": "code",
767 |    "execution_count": null,
768 |    "metadata": {},
769 |    "outputs": [],
770 |    "source": []
771 |   }
772 |  ],
773 |  "metadata": {
774 |   "kernelspec": {
775 |    "display_name": "Python [conda env:xgb8]",
776 |    "language": "python",
777 |    "name": "conda-env-xgb8-py"
778 |   },
779 |   "language_info": {
780 |    "codemirror_mode": {
781 |     "name": "ipython",
782 |     "version": 3
783 |    },
784 |    "file_extension": ".py",
785 |    "mimetype": "text/x-python",
786 |    "name": "python",
787 |    "nbconvert_exporter": "python",
788 |    "pygments_lexer": "ipython3",
789 |    "version": "3.6.5"
790 |   }
791 |  },
792 |  "nbformat": 4,
793 |  "nbformat_minor": 2
794 | }
795 | 


--------------------------------------------------------------------------------
/code/celerite_003.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "fname='celerite_003'\n",
  10 |     "\n",
  11 |     "n_tta = 6\n",
  12 |     "\n",
  13 |     "seed = 0"
  14 |    ]
  15 |   },
  16 |   {
  17 |    "cell_type": "code",
  18 |    "execution_count": 2,
  19 |    "metadata": {},
  20 |    "outputs": [],
  21 |    "source": [
  22 |     "import numpy as np\n",
  23 |     "import pandas as pd\n",
  24 |     "from sklearn.model_selection import StratifiedKFold\n",
  25 |     "from sklearn.metrics import confusion_matrix\n",
  26 |     "import gc\n",
  27 |     "import matplotlib.pyplot as plt\n",
  28 |     "import seaborn as sns\n",
  29 |     "import logging\n",
  30 |     "from tqdm import tqdm_notebook\n",
  31 |     "import itertools\n",
  32 |     "import pickle as pkl\n",
  33 |     "\n",
  34 |     "import autograd\n",
  35 |     "import celerite\n",
  36 |     "from celerite import terms\n",
  37 |     "import scipy.optimize as op\n",
  38 |     "from scipy.optimize import minimize\n",
  39 |     "\n",
  40 |     "\n",
  41 |     "from multiprocessing import Pool"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 3,
  47 |    "metadata": {},
  48 |    "outputs": [],
  49 |    "source": [
  50 |     "import random as rn\n",
  51 |     "def init_seeds(seed):\n",
  52 |     "\n",
  53 |     "    # The below is necessary for starting Numpy generated random numbers\n",
  54 |     "    # in a well-defined initial state.\n",
  55 |     "\n",
  56 |     "    np.random.seed(seed)\n",
  57 |     "\n",
  58 |     "    # The below is necessary for starting core Python generated random numbers\n",
  59 |     "    # in a well-defined state.\n",
  60 |     "\n",
  61 |     "    rn.seed(seed)\n",
  62 |     "\n",
  63 |     "\n",
  64 |     "init_seeds(seed)"
  65 |    ]
  66 |   },
  67 |   {
  68 |    "cell_type": "code",
  69 |    "execution_count": 4,
  70 |    "metadata": {},
  71 |    "outputs": [],
  72 |    "source": [
  73 |     "# eda_031_celerite\n",
  74 |     "def get_gp(train, object_id, expand=True):\n",
  75 |     "    passbands = [0, 1, 2, 3, 4, 5]\n",
  76 |     "    n_param = 2\n",
  77 |     "    res = pd.DataFrame()\n",
  78 |     "    res['object_id'] = [object_id]\n",
  79 |     "    for pb in passbands:\n",
  80 |     "        for i in range(n_param):\n",
  81 |     "            res['celerite_%d_%d' % (pb, i)] = np.NaN\n",
  82 |     "    df0 = train[train.object_id == object_id]\n",
  83 |     "    if df0.hostgal_photoz.mean() == 0:\n",
  84 |     "        return res\n",
  85 |     "    offset = 11\n",
  86 |     "    for pb in range(6):\n",
  87 |     "        if True:\n",
  88 |     "            df = df0[(df0.object_id == object_id) & (df0.passband == pb)]\n",
  89 |     "            flux_err_mean = df.flux_err.mean()\n",
  90 |     "            flux_err_std = df.flux_err.std()\n",
  91 |     "            df = df[df.flux_err <= flux_err_mean + 6*flux_err_std]\n",
  92 |     "            mjd_delta_prev = (df.mjd - df.mjd.shift(1)).fillna(100).values.ravel()\n",
  93 |     "            mjd_delta_next = (df.mjd.shift(-1) - df.mjd).fillna(100).values.ravel()\n",
  94 |     "            x_min = df.mjd.min()\n",
  95 |     "            x_max = df.mjd.max()\n",
  96 |     "            yerr_mean = df.flux_err.mean()\n",
  97 |     "            x = df.mjd.values\n",
  98 |     "            y = df.flux.values\n",
  99 |     "            yerr = df.flux_err\n",
 100 |     "            if expand:\n",
 101 |     "                mjd_delta_prev = np.concatenate((100 * np.ones((offset,)),\n",
 102 |     "                                    mjd_delta_prev,\n",
 103 |     "                                    100 * np.ones((offset,)),\n",
 104 |     "                                  ))\n",
 105 |     "                mjd_delta_next = np.concatenate((100 * np.ones((offset,)),\n",
 106 |     "                                    mjd_delta_next,\n",
 107 |     "                                    100 * np.ones((offset,)),\n",
 108 |     "                                  ))\n",
 109 |     "                x = np.concatenate((np.linspace(x_min-250, x_min -200, offset),\n",
 110 |     "                                    x,\n",
 111 |     "                                    np.linspace(x_max+200, x_max+250, offset),\n",
 112 |     "                                  ))\n",
 113 |     "                y = np.concatenate((np.random.randn(offset) * yerr_mean,\n",
 114 |     "                                    y,\n",
 115 |     "                                    np.random.randn(offset) * yerr_mean\n",
 116 |     "                                   ))\n",
 117 |     "                yerr = np.concatenate((yerr_mean * np.ones(offset),\n",
 118 |     "                                        yerr,\n",
 119 |     "                                        yerr_mean * np.ones(offset)\n",
 120 |     "                                      ))\n",
 121 |     "            #ystd = y.std()\n",
 122 |     "            #y /= ystd\n",
 123 |     "            #yerr = yerr / ystd\n",
 124 |     "\n",
 125 |     "            # A Matern32 component\n",
 126 |     "            log_sigma = 0\n",
 127 |     "            log_rho = 0\n",
 128 |     "            eps = 0.001\n",
 129 |     "            bounds = dict(log_sigma=(-15, 15), log_rho=(-15, 15))\n",
 130 |     "            kernel = terms.Matern32Term(log_sigma=log_sigma, log_rho=log_rho, eps=eps, bounds=bounds)\n",
 131 |     "            #kernel.freeze_parameter(\"eps\")  # We don't want to fit for \"Q\" in this term\n",
 132 |     "\n",
 133 |     "\n",
 134 |     "            gp = celerite.GP(kernel, mean=0)\n",
 135 |     "            gp.compute(x, yerr)  # You always need to call compute once.\n",
 136 |     "\n",
 137 |     "            def neg_log_like(params, y, gp):\n",
 138 |     "                gp.set_parameter_vector(params)\n",
 139 |     "                return -gp.log_likelihood(y)\n",
 140 |     "\n",
 141 |     "            def grad_neg_log_like(params, y, gp):\n",
 142 |     "                gp.set_parameter_vector(params)\n",
 143 |     "                return -gp.grad_log_likelihood(y)[1]\n",
 144 |     "\n",
 145 |     "            initial_params = gp.get_parameter_vector()\n",
 146 |     "            bounds = gp.get_parameter_bounds()\n",
 147 |     "\n",
 148 |     "            r = minimize(neg_log_like, initial_params, jac=grad_neg_log_like, \n",
 149 |     "                         method=\"L-BFGS-B\", bounds=bounds, args=(y, gp))\n",
 150 |     "            for i in range(n_param):\n",
 151 |     "                res['celerite_%d_%d' % (pb, i)] = r.x[i]\n",
 152 |     "        else:\n",
 153 |     "            continue\n",
 154 |     "    return res"
 155 |    ]
 156 |   },
 157 |   {
 158 |    "cell_type": "code",
 159 |    "execution_count": 5,
 160 |    "metadata": {},
 161 |    "outputs": [],
 162 |    "source": [
 163 |     "def apply_gp(df, meta):\n",
 164 |     "    df = df[['object_id', 'mjd', 'passband', 'flux', 'flux_err']].merge(meta[['object_id', 'hostgal_photoz']],\n",
 165 |     "                                                           how='left', on='object_id')\n",
 166 |     "    agg =  [get_gp(df, object_id) for object_id in tqdm_notebook(df.object_id.unique())]\n",
 167 |     "    return pd.concat(agg, axis=0)"
 168 |    ]
 169 |   },
 170 |   {
 171 |    "cell_type": "code",
 172 |    "execution_count": 6,
 173 |    "metadata": {},
 174 |    "outputs": [
 175 |     {
 176 |      "data": {
 177 |       "text/html": [
 178 |        "<div>\n",
 179 |        "<style scoped>\n",
 180 |        "    .dataframe tbody tr th:only-of-type {\n",
 181 |        "        vertical-align: middle;\n",
 182 |        "    }\n",
 183 |        "\n",
 184 |        "    .dataframe tbody tr th {\n",
 185 |        "        vertical-align: top;\n",
 186 |        "    }\n",
 187 |        "\n",
 188 |        "    .dataframe thead th {\n",
 189 |        "        text-align: right;\n",
 190 |        "    }\n",
 191 |        "</style>\n",
 192 |        "<table border=\"1\" class=\"dataframe\">\n",
 193 |        "  <thead>\n",
 194 |        "    <tr style=\"text-align: right;\">\n",
 195 |        "      <th></th>\n",
 196 |        "      <th>object_id</th>\n",
 197 |        "      <th>mjd</th>\n",
 198 |        "      <th>passband</th>\n",
 199 |        "      <th>flux</th>\n",
 200 |        "      <th>flux_err</th>\n",
 201 |        "      <th>detected</th>\n",
 202 |        "    </tr>\n",
 203 |        "  </thead>\n",
 204 |        "  <tbody>\n",
 205 |        "    <tr>\n",
 206 |        "      <th>0</th>\n",
 207 |        "      <td>615</td>\n",
 208 |        "      <td>59750.4229</td>\n",
 209 |        "      <td>2</td>\n",
 210 |        "      <td>-544.810303</td>\n",
 211 |        "      <td>3.622952</td>\n",
 212 |        "      <td>1</td>\n",
 213 |        "    </tr>\n",
 214 |        "    <tr>\n",
 215 |        "      <th>1</th>\n",
 216 |        "      <td>615</td>\n",
 217 |        "      <td>59750.4306</td>\n",
 218 |        "      <td>1</td>\n",
 219 |        "      <td>-816.434326</td>\n",
 220 |        "      <td>5.553370</td>\n",
 221 |        "      <td>1</td>\n",
 222 |        "    </tr>\n",
 223 |        "    <tr>\n",
 224 |        "      <th>2</th>\n",
 225 |        "      <td>615</td>\n",
 226 |        "      <td>59750.4383</td>\n",
 227 |        "      <td>3</td>\n",
 228 |        "      <td>-471.385529</td>\n",
 229 |        "      <td>3.801213</td>\n",
 230 |        "      <td>1</td>\n",
 231 |        "    </tr>\n",
 232 |        "    <tr>\n",
 233 |        "      <th>3</th>\n",
 234 |        "      <td>615</td>\n",
 235 |        "      <td>59750.4450</td>\n",
 236 |        "      <td>4</td>\n",
 237 |        "      <td>-388.984985</td>\n",
 238 |        "      <td>11.395031</td>\n",
 239 |        "      <td>1</td>\n",
 240 |        "    </tr>\n",
 241 |        "    <tr>\n",
 242 |        "      <th>4</th>\n",
 243 |        "      <td>615</td>\n",
 244 |        "      <td>59752.4070</td>\n",
 245 |        "      <td>2</td>\n",
 246 |        "      <td>-681.858887</td>\n",
 247 |        "      <td>4.041204</td>\n",
 248 |        "      <td>1</td>\n",
 249 |        "    </tr>\n",
 250 |        "  </tbody>\n",
 251 |        "</table>\n",
 252 |        "</div>"
 253 |       ],
 254 |       "text/plain": [
 255 |        "   object_id         mjd  passband        flux   flux_err  detected\n",
 256 |        "0        615  59750.4229         2 -544.810303   3.622952         1\n",
 257 |        "1        615  59750.4306         1 -816.434326   5.553370         1\n",
 258 |        "2        615  59750.4383         3 -471.385529   3.801213         1\n",
 259 |        "3        615  59750.4450         4 -388.984985  11.395031         1\n",
 260 |        "4        615  59752.4070         2 -681.858887   4.041204         1"
 261 |       ]
 262 |      },
 263 |      "execution_count": 6,
 264 |      "metadata": {},
 265 |      "output_type": "execute_result"
 266 |     }
 267 |    ],
 268 |    "source": [
 269 |     "train = pd.read_csv('../input/training_set.csv')\n",
 270 |     "train.head()"
 271 |    ]
 272 |   },
 273 |   {
 274 |    "cell_type": "code",
 275 |    "execution_count": 7,
 276 |    "metadata": {},
 277 |    "outputs": [
 278 |     {
 279 |      "data": {
 280 |       "text/html": [
 281 |        "<div>\n",
 282 |        "<style scoped>\n",
 283 |        "    .dataframe tbody tr th:only-of-type {\n",
 284 |        "        vertical-align: middle;\n",
 285 |        "    }\n",
 286 |        "\n",
 287 |        "    .dataframe tbody tr th {\n",
 288 |        "        vertical-align: top;\n",
 289 |        "    }\n",
 290 |        "\n",
 291 |        "    .dataframe thead th {\n",
 292 |        "        text-align: right;\n",
 293 |        "    }\n",
 294 |        "</style>\n",
 295 |        "<table border=\"1\" class=\"dataframe\">\n",
 296 |        "  <thead>\n",
 297 |        "    <tr style=\"text-align: right;\">\n",
 298 |        "      <th></th>\n",
 299 |        "      <th>object_id</th>\n",
 300 |        "      <th>ddf</th>\n",
 301 |        "      <th>hostgal_photoz</th>\n",
 302 |        "      <th>target</th>\n",
 303 |        "    </tr>\n",
 304 |        "  </thead>\n",
 305 |        "  <tbody>\n",
 306 |        "    <tr>\n",
 307 |        "      <th>0</th>\n",
 308 |        "      <td>615</td>\n",
 309 |        "      <td>1</td>\n",
 310 |        "      <td>0.0000</td>\n",
 311 |        "      <td>92</td>\n",
 312 |        "    </tr>\n",
 313 |        "    <tr>\n",
 314 |        "      <th>1</th>\n",
 315 |        "      <td>713</td>\n",
 316 |        "      <td>1</td>\n",
 317 |        "      <td>1.6267</td>\n",
 318 |        "      <td>88</td>\n",
 319 |        "    </tr>\n",
 320 |        "    <tr>\n",
 321 |        "      <th>2</th>\n",
 322 |        "      <td>730</td>\n",
 323 |        "      <td>1</td>\n",
 324 |        "      <td>0.2262</td>\n",
 325 |        "      <td>42</td>\n",
 326 |        "    </tr>\n",
 327 |        "    <tr>\n",
 328 |        "      <th>3</th>\n",
 329 |        "      <td>745</td>\n",
 330 |        "      <td>1</td>\n",
 331 |        "      <td>0.2813</td>\n",
 332 |        "      <td>90</td>\n",
 333 |        "    </tr>\n",
 334 |        "    <tr>\n",
 335 |        "      <th>4</th>\n",
 336 |        "      <td>1124</td>\n",
 337 |        "      <td>1</td>\n",
 338 |        "      <td>0.2415</td>\n",
 339 |        "      <td>90</td>\n",
 340 |        "    </tr>\n",
 341 |        "  </tbody>\n",
 342 |        "</table>\n",
 343 |        "</div>"
 344 |       ],
 345 |       "text/plain": [
 346 |        "   object_id  ddf  hostgal_photoz  target\n",
 347 |        "0        615    1          0.0000      92\n",
 348 |        "1        713    1          1.6267      88\n",
 349 |        "2        730    1          0.2262      42\n",
 350 |        "3        745    1          0.2813      90\n",
 351 |        "4       1124    1          0.2415      90"
 352 |       ]
 353 |      },
 354 |      "execution_count": 7,
 355 |      "metadata": {},
 356 |      "output_type": "execute_result"
 357 |     }
 358 |    ],
 359 |    "source": [
 360 |     "meta_cols = ['object_id', 'ddf', 'hostgal_photoz', 'target']\n",
 361 |     "meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n",
 362 |     "meta_train.head()"
 363 |    ]
 364 |   },
 365 |   {
 366 |    "cell_type": "code",
 367 |    "execution_count": 8,
 368 |    "metadata": {},
 369 |    "outputs": [
 370 |     {
 371 |      "data": {
 372 |       "text/html": [
 373 |        "<div>\n",
 374 |        "<style scoped>\n",
 375 |        "    .dataframe tbody tr th:only-of-type {\n",
 376 |        "        vertical-align: middle;\n",
 377 |        "    }\n",
 378 |        "\n",
 379 |        "    .dataframe tbody tr th {\n",
 380 |        "        vertical-align: top;\n",
 381 |        "    }\n",
 382 |        "\n",
 383 |        "    .dataframe thead th {\n",
 384 |        "        text-align: right;\n",
 385 |        "    }\n",
 386 |        "</style>\n",
 387 |        "<table border=\"1\" class=\"dataframe\">\n",
 388 |        "  <thead>\n",
 389 |        "    <tr style=\"text-align: right;\">\n",
 390 |        "      <th></th>\n",
 391 |        "      <th>object_id</th>\n",
 392 |        "      <th>celerite_0_0</th>\n",
 393 |        "      <th>celerite_0_1</th>\n",
 394 |        "      <th>celerite_1_0</th>\n",
 395 |        "      <th>celerite_1_1</th>\n",
 396 |        "      <th>celerite_2_0</th>\n",
 397 |        "      <th>celerite_2_1</th>\n",
 398 |        "      <th>celerite_3_0</th>\n",
 399 |        "      <th>celerite_3_1</th>\n",
 400 |        "      <th>celerite_4_0</th>\n",
 401 |        "      <th>celerite_4_1</th>\n",
 402 |        "      <th>celerite_5_0</th>\n",
 403 |        "      <th>celerite_5_1</th>\n",
 404 |        "    </tr>\n",
 405 |        "  </thead>\n",
 406 |        "  <tbody>\n",
 407 |        "    <tr>\n",
 408 |        "      <th>0</th>\n",
 409 |        "      <td>4173</td>\n",
 410 |        "      <td>4.372327</td>\n",
 411 |        "      <td>5.003063</td>\n",
 412 |        "      <td>4.66952</td>\n",
 413 |        "      <td>5.350874</td>\n",
 414 |        "      <td>5.380249</td>\n",
 415 |        "      <td>5.955909</td>\n",
 416 |        "      <td>4.256575</td>\n",
 417 |        "      <td>5.256</td>\n",
 418 |        "      <td>3.884235</td>\n",
 419 |        "      <td>5.017465</td>\n",
 420 |        "      <td>3.452796</td>\n",
 421 |        "      <td>4.815897</td>\n",
 422 |        "    </tr>\n",
 423 |        "  </tbody>\n",
 424 |        "</table>\n",
 425 |        "</div>"
 426 |       ],
 427 |       "text/plain": [
 428 |        "   object_id  celerite_0_0  celerite_0_1  celerite_1_0  celerite_1_1  \\\n",
 429 |        "0       4173      4.372327      5.003063       4.66952      5.350874   \n",
 430 |        "\n",
 431 |        "   celerite_2_0  celerite_2_1  celerite_3_0  celerite_3_1  celerite_4_0  \\\n",
 432 |        "0      5.380249      5.955909      4.256575         5.256      3.884235   \n",
 433 |        "\n",
 434 |        "   celerite_4_1  celerite_5_0  celerite_5_1  \n",
 435 |        "0      5.017465      3.452796      4.815897  "
 436 |       ]
 437 |      },
 438 |      "execution_count": 8,
 439 |      "metadata": {},
 440 |      "output_type": "execute_result"
 441 |     }
 442 |    ],
 443 |    "source": [
 444 |     "get_gp(train.merge(meta_train, how='left', on='object_id'), 4173)"
 445 |    ]
 446 |   },
 447 |   {
 448 |    "cell_type": "code",
 449 |    "execution_count": 8,
 450 |    "metadata": {},
 451 |    "outputs": [
 452 |     {
 453 |      "data": {
 454 |       "text/html": [
 455 |        "<div>\n",
 456 |        "<style scoped>\n",
 457 |        "    .dataframe tbody tr th:only-of-type {\n",
 458 |        "        vertical-align: middle;\n",
 459 |        "    }\n",
 460 |        "\n",
 461 |        "    .dataframe tbody tr th {\n",
 462 |        "        vertical-align: top;\n",
 463 |        "    }\n",
 464 |        "\n",
 465 |        "    .dataframe thead th {\n",
 466 |        "        text-align: right;\n",
 467 |        "    }\n",
 468 |        "</style>\n",
 469 |        "<table border=\"1\" class=\"dataframe\">\n",
 470 |        "  <thead>\n",
 471 |        "    <tr style=\"text-align: right;\">\n",
 472 |        "      <th></th>\n",
 473 |        "      <th>object_id</th>\n",
 474 |        "      <th>celerite_0_0</th>\n",
 475 |        "      <th>celerite_0_1</th>\n",
 476 |        "      <th>celerite_1_0</th>\n",
 477 |        "      <th>celerite_1_1</th>\n",
 478 |        "      <th>celerite_2_0</th>\n",
 479 |        "      <th>celerite_2_1</th>\n",
 480 |        "      <th>celerite_3_0</th>\n",
 481 |        "      <th>celerite_3_1</th>\n",
 482 |        "      <th>celerite_4_0</th>\n",
 483 |        "      <th>celerite_4_1</th>\n",
 484 |        "      <th>celerite_5_0</th>\n",
 485 |        "      <th>celerite_5_1</th>\n",
 486 |        "    </tr>\n",
 487 |        "  </thead>\n",
 488 |        "  <tbody>\n",
 489 |        "    <tr>\n",
 490 |        "      <th>0</th>\n",
 491 |        "      <td>4173</td>\n",
 492 |        "      <td>4.372327</td>\n",
 493 |        "      <td>5.003063</td>\n",
 494 |        "      <td>4.66952</td>\n",
 495 |        "      <td>5.350874</td>\n",
 496 |        "      <td>5.380249</td>\n",
 497 |        "      <td>5.955909</td>\n",
 498 |        "      <td>4.256575</td>\n",
 499 |        "      <td>5.256</td>\n",
 500 |        "      <td>3.884235</td>\n",
 501 |        "      <td>5.017465</td>\n",
 502 |        "      <td>3.440875</td>\n",
 503 |        "      <td>4.792469</td>\n",
 504 |        "    </tr>\n",
 505 |        "  </tbody>\n",
 506 |        "</table>\n",
 507 |        "</div>"
 508 |       ],
 509 |       "text/plain": [
 510 |        "   object_id  celerite_0_0  celerite_0_1  celerite_1_0  celerite_1_1  \\\n",
 511 |        "0       4173      4.372327      5.003063       4.66952      5.350874   \n",
 512 |        "\n",
 513 |        "   celerite_2_0  celerite_2_1  celerite_3_0  celerite_3_1  celerite_4_0  \\\n",
 514 |        "0      5.380249      5.955909      4.256575         5.256      3.884235   \n",
 515 |        "\n",
 516 |        "   celerite_4_1  celerite_5_0  celerite_5_1  \n",
 517 |        "0      5.017465      3.440875      4.792469  "
 518 |       ]
 519 |      },
 520 |      "execution_count": 8,
 521 |      "metadata": {},
 522 |      "output_type": "execute_result"
 523 |     }
 524 |    ],
 525 |    "source": []
 526 |   },
 527 |   {
 528 |    "cell_type": "code",
 529 |    "execution_count": 9,
 530 |    "metadata": {},
 531 |    "outputs": [],
 532 |    "source": [
 533 |     "def work_tta(param):\n",
 534 |     "    (i, fname) = param\n",
 535 |     "    print('starting worker', i)\n",
 536 |     "    train = pd.read_csv('../input/training_set.csv')\n",
 537 |     "    meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n",
 538 |     "    df = train.copy()\n",
 539 |     "    if i > 0:\n",
 540 |     "        init_seeds(i)\n",
 541 |     "        df['flux'] += df['flux_err'] * np.random.randn(*df['flux_err'].shape)\n",
 542 |     "    df = apply_gp(df, meta_train)\n",
 543 |     "    with open('../data/tta_%d_%s.pkl' % (i, fname), 'wb') as file:\n",
 544 |     "        pkl.dump(df, file)  \n",
 545 |     "    print('ending worker', i)\n",
 546 |     "    return 'done'"
 547 |    ]
 548 |   },
 549 |   {
 550 |    "cell_type": "code",
 551 |    "execution_count": 10,
 552 |    "metadata": {
 553 |     "scrolled": true
 554 |    },
 555 |    "outputs": [
 556 |     {
 557 |      "name": "stdout",
 558 |      "output_type": "stream",
 559 |      "text": [
 560 |       "starting worker 2\n",
 561 |       "starting worker 0\n",
 562 |       "starting worker 5\n",
 563 |       "starting worker 1\n",
 564 |       "starting worker 3\n",
 565 |       "starting worker 7\n",
 566 |       "starting worker 8\n",
 567 |       "starting worker 4\n",
 568 |       "starting worker 6\n",
 569 |       "starting worker 9\n",
 570 |       "starting worker 10\n"
 571 |      ]
 572 |     },
 573 |     {
 574 |      "data": {
 575 |       "application/vnd.jupyter.widget-view+json": {
 576 |        "model_id": "055ec5381e224bf4aa8b822d643bef44",
 577 |        "version_major": 2,
 578 |        "version_minor": 0
 579 |       },
 580 |       "text/plain": [
 581 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 582 |       ]
 583 |      },
 584 |      "metadata": {},
 585 |      "output_type": "display_data"
 586 |     },
 587 |     {
 588 |      "data": {
 589 |       "application/vnd.jupyter.widget-view+json": {
 590 |        "model_id": "eccc4f4abb6f4469949010054621ba13",
 591 |        "version_major": 2,
 592 |        "version_minor": 0
 593 |       },
 594 |       "text/plain": [
 595 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 596 |       ]
 597 |      },
 598 |      "metadata": {},
 599 |      "output_type": "display_data"
 600 |     },
 601 |     {
 602 |      "data": {
 603 |       "application/vnd.jupyter.widget-view+json": {
 604 |        "model_id": "7a5a7f381a2148ddb753e3993676928a",
 605 |        "version_major": 2,
 606 |        "version_minor": 0
 607 |       },
 608 |       "text/plain": [
 609 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 610 |       ]
 611 |      },
 612 |      "metadata": {},
 613 |      "output_type": "display_data"
 614 |     },
 615 |     {
 616 |      "data": {
 617 |       "application/vnd.jupyter.widget-view+json": {
 618 |        "model_id": "3b5c7b17c28247fbb1248c91feabee0b",
 619 |        "version_major": 2,
 620 |        "version_minor": 0
 621 |       },
 622 |       "text/plain": [
 623 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 624 |       ]
 625 |      },
 626 |      "metadata": {},
 627 |      "output_type": "display_data"
 628 |     },
 629 |     {
 630 |      "data": {
 631 |       "application/vnd.jupyter.widget-view+json": {
 632 |        "model_id": "1f0df15e730e42a0bc602dbc25b4d249",
 633 |        "version_major": 2,
 634 |        "version_minor": 0
 635 |       },
 636 |       "text/plain": [
 637 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 638 |       ]
 639 |      },
 640 |      "metadata": {},
 641 |      "output_type": "display_data"
 642 |     },
 643 |     {
 644 |      "data": {
 645 |       "application/vnd.jupyter.widget-view+json": {
 646 |        "model_id": "fff19743f3bd47f880d906aa2bd99cda",
 647 |        "version_major": 2,
 648 |        "version_minor": 0
 649 |       },
 650 |       "text/plain": [
 651 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 652 |       ]
 653 |      },
 654 |      "metadata": {},
 655 |      "output_type": "display_data"
 656 |     },
 657 |     {
 658 |      "data": {
 659 |       "application/vnd.jupyter.widget-view+json": {
 660 |        "model_id": "baefe6c321c345cb97e504a0eda324eb",
 661 |        "version_major": 2,
 662 |        "version_minor": 0
 663 |       },
 664 |       "text/plain": [
 665 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 666 |       ]
 667 |      },
 668 |      "metadata": {},
 669 |      "output_type": "display_data"
 670 |     },
 671 |     {
 672 |      "data": {
 673 |       "application/vnd.jupyter.widget-view+json": {
 674 |        "model_id": "b4605f0a84414c29a339d7517733972b",
 675 |        "version_major": 2,
 676 |        "version_minor": 0
 677 |       },
 678 |       "text/plain": [
 679 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 680 |       ]
 681 |      },
 682 |      "metadata": {},
 683 |      "output_type": "display_data"
 684 |     },
 685 |     {
 686 |      "data": {
 687 |       "application/vnd.jupyter.widget-view+json": {
 688 |        "model_id": "d469f7b4f9af44ff93910f5aec1e17e2",
 689 |        "version_major": 2,
 690 |        "version_minor": 0
 691 |       },
 692 |       "text/plain": [
 693 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 694 |       ]
 695 |      },
 696 |      "metadata": {},
 697 |      "output_type": "display_data"
 698 |     },
 699 |     {
 700 |      "data": {
 701 |       "application/vnd.jupyter.widget-view+json": {
 702 |        "model_id": "c8045a76ae1b4a7dbfdce2b89e8be1f9",
 703 |        "version_major": 2,
 704 |        "version_minor": 0
 705 |       },
 706 |       "text/plain": [
 707 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 708 |       ]
 709 |      },
 710 |      "metadata": {},
 711 |      "output_type": "display_data"
 712 |     },
 713 |     {
 714 |      "data": {
 715 |       "application/vnd.jupyter.widget-view+json": {
 716 |        "model_id": "f368619da67e491a9399790950f72025",
 717 |        "version_major": 2,
 718 |        "version_minor": 0
 719 |       },
 720 |       "text/plain": [
 721 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 722 |       ]
 723 |      },
 724 |      "metadata": {},
 725 |      "output_type": "display_data"
 726 |     },
 727 |     {
 728 |      "name": "stdout",
 729 |      "output_type": "stream",
 730 |      "text": [
 731 |       "\n",
 732 |       "ending worker 7\n",
 733 |       "\n",
 734 |       "ending worker 8\n",
 735 |       "\n",
 736 |       "\n",
 737 |       "\n",
 738 |       "ending worker 3\n",
 739 |       "\n",
 740 |       "\n",
 741 |       "ending worker 5\n",
 742 |       "\n",
 743 |       "ending worker 0\n",
 744 |       "ending worker 9\n",
 745 |       "ending worker 4\n",
 746 |       "ending worker 1\n",
 747 |       "\n",
 748 |       "ending worker 10\n",
 749 |       "\n",
 750 |       "ending worker 2\n",
 751 |       "\n",
 752 |       "ending worker 6\n"
 753 |      ]
 754 |     }
 755 |    ],
 756 |    "source": [
 757 |     "params = [(i, fname) for i in range(11)]\n",
 758 |     "\n",
 759 |     "if 1: \n",
 760 |     "    pool = Pool(processes=11, maxtasksperchild=1)\n",
 761 |     "    ls   = pool.map( work_tta, params, chunksize=1 )\n",
 762 |     "    pool.close()\n",
 763 |     "else:\n",
 764 |     "    ls = [work_tta(param) for param in params]"
 765 |    ]
 766 |   },
 767 |   {
 768 |    "cell_type": "code",
 769 |    "execution_count": null,
 770 |    "metadata": {
 771 |     "scrolled": true
 772 |    },
 773 |    "outputs": [
 774 |     {
 775 |      "name": "stdout",
 776 |      "output_type": "stream",
 777 |      "text": [
 778 |       "starting worker 1\n",
 779 |       "starting worker 0\n",
 780 |       "starting worker 2\n",
 781 |       "starting worker 3\n",
 782 |       "starting worker 7\n",
 783 |       "starting worker 5\n",
 784 |       "starting worker 4\n",
 785 |       "starting worker 6\n",
 786 |       "starting worker 8\n",
 787 |       "starting worker 10\n",
 788 |       "starting worker 9\n"
 789 |      ]
 790 |     },
 791 |     {
 792 |      "data": {
 793 |       "application/vnd.jupyter.widget-view+json": {
 794 |        "model_id": "b73097db30c948ec95bdeae042d30f72",
 795 |        "version_major": 2,
 796 |        "version_minor": 0
 797 |       },
 798 |       "text/plain": [
 799 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 800 |       ]
 801 |      },
 802 |      "metadata": {},
 803 |      "output_type": "display_data"
 804 |     },
 805 |     {
 806 |      "data": {
 807 |       "application/vnd.jupyter.widget-view+json": {
 808 |        "model_id": "6ecc80db2b0e4898b52a8aa4630bc0bb",
 809 |        "version_major": 2,
 810 |        "version_minor": 0
 811 |       },
 812 |       "text/plain": [
 813 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 814 |       ]
 815 |      },
 816 |      "metadata": {},
 817 |      "output_type": "display_data"
 818 |     },
 819 |     {
 820 |      "data": {
 821 |       "application/vnd.jupyter.widget-view+json": {
 822 |        "model_id": "6e4dd2a6fd444d06bffed8c7bd7b5c6b",
 823 |        "version_major": 2,
 824 |        "version_minor": 0
 825 |       },
 826 |       "text/plain": [
 827 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 828 |       ]
 829 |      },
 830 |      "metadata": {},
 831 |      "output_type": "display_data"
 832 |     },
 833 |     {
 834 |      "data": {
 835 |       "application/vnd.jupyter.widget-view+json": {
 836 |        "model_id": "0eab5b22761843f28e7c2ff2c1d99305",
 837 |        "version_major": 2,
 838 |        "version_minor": 0
 839 |       },
 840 |       "text/plain": [
 841 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 842 |       ]
 843 |      },
 844 |      "metadata": {},
 845 |      "output_type": "display_data"
 846 |     },
 847 |     {
 848 |      "data": {
 849 |       "application/vnd.jupyter.widget-view+json": {
 850 |        "model_id": "f5dfdb471f44464eb4cfd36bef0e95cf",
 851 |        "version_major": 2,
 852 |        "version_minor": 0
 853 |       },
 854 |       "text/plain": [
 855 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 856 |       ]
 857 |      },
 858 |      "metadata": {},
 859 |      "output_type": "display_data"
 860 |     },
 861 |     {
 862 |      "data": {
 863 |       "application/vnd.jupyter.widget-view+json": {
 864 |        "model_id": "e96259aa66d14fa5bacea4f554de342e",
 865 |        "version_major": 2,
 866 |        "version_minor": 0
 867 |       },
 868 |       "text/plain": [
 869 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 870 |       ]
 871 |      },
 872 |      "metadata": {},
 873 |      "output_type": "display_data"
 874 |     },
 875 |     {
 876 |      "data": {
 877 |       "application/vnd.jupyter.widget-view+json": {
 878 |        "model_id": "0577077a150a424fa4ef7c50426ed125",
 879 |        "version_major": 2,
 880 |        "version_minor": 0
 881 |       },
 882 |       "text/plain": [
 883 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 884 |       ]
 885 |      },
 886 |      "metadata": {},
 887 |      "output_type": "display_data"
 888 |     },
 889 |     {
 890 |      "data": {
 891 |       "application/vnd.jupyter.widget-view+json": {
 892 |        "model_id": "ab0b35f698fd4fa5897a129b0db250d6",
 893 |        "version_major": 2,
 894 |        "version_minor": 0
 895 |       },
 896 |       "text/plain": [
 897 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 898 |       ]
 899 |      },
 900 |      "metadata": {},
 901 |      "output_type": "display_data"
 902 |     },
 903 |     {
 904 |      "data": {
 905 |       "application/vnd.jupyter.widget-view+json": {
 906 |        "model_id": "17d8fbecaabd450599501d3aab450ffc",
 907 |        "version_major": 2,
 908 |        "version_minor": 0
 909 |       },
 910 |       "text/plain": [
 911 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 912 |       ]
 913 |      },
 914 |      "metadata": {},
 915 |      "output_type": "display_data"
 916 |     },
 917 |     {
 918 |      "data": {
 919 |       "application/vnd.jupyter.widget-view+json": {
 920 |        "model_id": "e95cb3f771f6438bb39b3374ca7c04ad",
 921 |        "version_major": 2,
 922 |        "version_minor": 0
 923 |       },
 924 |       "text/plain": [
 925 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 926 |       ]
 927 |      },
 928 |      "metadata": {},
 929 |      "output_type": "display_data"
 930 |     },
 931 |     {
 932 |      "data": {
 933 |       "application/vnd.jupyter.widget-view+json": {
 934 |        "model_id": "b4ddfc8e5daa4c8d9641ce0fd6dccf9e",
 935 |        "version_major": 2,
 936 |        "version_minor": 0
 937 |       },
 938 |       "text/plain": [
 939 |        "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))"
 940 |       ]
 941 |      },
 942 |      "metadata": {},
 943 |      "output_type": "display_data"
 944 |     }
 945 |    ],
 946 |    "source": []
 947 |   },
 948 |   {
 949 |    "cell_type": "code",
 950 |    "execution_count": 11,
 951 |    "metadata": {},
 952 |    "outputs": [],
 953 |    "source": [
 954 |     "def work_test(param):\n",
 955 |     "    (i, fname) = param\n",
 956 |     "    print('starting worker', i)\n",
 957 |     "    with open('../input/test_chunk_%d.csv' %i, 'rb') as file:\n",
 958 |     "        test = pkl.load(file)\n",
 959 |     "    meta_test = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n",
 960 |     "    df = apply_gp(test, meta_test)\n",
 961 |     "    with open('../data/test_%d_%s.pkl' % (i, fname), 'wb') as file:\n",
 962 |     "        pkl.dump(df, file)  \n",
 963 |     "    print('ending worker', i)\n",
 964 |     "    return 'done'"
 965 |    ]
 966 |   },
 967 |   {
 968 |    "cell_type": "code",
 969 |    "execution_count": 12,
 970 |    "metadata": {
 971 |     "scrolled": true
 972 |    },
 973 |    "outputs": [
 974 |     {
 975 |      "name": "stdout",
 976 |      "output_type": "stream",
 977 |      "text": [
 978 |       "starting worker 0\n",
 979 |       "starting worker 2\n",
 980 |       "starting worker 4\n",
 981 |       "starting worker 3\n",
 982 |       "starting worker 6\n",
 983 |       "starting worker 8\n",
 984 |       "starting worker 5\n",
 985 |       "starting worker 1\n",
 986 |       "starting worker 7\n",
 987 |       "starting worker 15\n",
 988 |       "starting worker 11\n",
 989 |       "starting worker 9\n",
 990 |       "starting worker 10\n",
 991 |       "starting worker 14\n",
 992 |       "starting worker 13\n",
 993 |       "starting worker 12\n",
 994 |       "starting worker 18\n",
 995 |       "starting worker 16\n",
 996 |       "starting worker 19\n",
 997 |       "starting worker 17\n"
 998 |      ]
 999 |     },
1000 |     {
1001 |      "data": {
1002 |       "application/vnd.jupyter.widget-view+json": {
1003 |        "model_id": "081e7a429d86495c80833ac91191028f",
1004 |        "version_major": 2,
1005 |        "version_minor": 0
1006 |       },
1007 |       "text/plain": [
1008 |        "HBox(children=(IntProgress(value=0, max=15137), HTML(value='')))"
1009 |       ]
1010 |      },
1011 |      "metadata": {},
1012 |      "output_type": "display_data"
1013 |     },
1014 |     {
1015 |      "data": {
1016 |       "application/vnd.jupyter.widget-view+json": {
1017 |        "model_id": "781ff9d5e6ed4d18956acd0f7b8cac28",
1018 |        "version_major": 2,
1019 |        "version_minor": 0
1020 |       },
1021 |       "text/plain": [
1022 |        "HBox(children=(IntProgress(value=0, max=39057), HTML(value='')))"
1023 |       ]
1024 |      },
1025 |      "metadata": {},
1026 |      "output_type": "display_data"
1027 |     },
1028 |     {
1029 |      "data": {
1030 |       "application/vnd.jupyter.widget-view+json": {
1031 |        "model_id": "f3bf7e21700b458e8877e744bfaa35e7",
1032 |        "version_major": 2,
1033 |        "version_minor": 0
1034 |       },
1035 |       "text/plain": [
1036 |        "HBox(children=(IntProgress(value=0, max=39048), HTML(value='')))"
1037 |       ]
1038 |      },
1039 |      "metadata": {},
1040 |      "output_type": "display_data"
1041 |     },
1042 |     {
1043 |      "data": {
1044 |       "application/vnd.jupyter.widget-view+json": {
1045 |        "model_id": "0ae14288da2c49b7b5da6c9448508e96",
1046 |        "version_major": 2,
1047 |        "version_minor": 0
1048 |       },
1049 |       "text/plain": [
1050 |        "HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))"
1051 |       ]
1052 |      },
1053 |      "metadata": {},
1054 |      "output_type": "display_data"
1055 |     },
1056 |     {
1057 |      "data": {
1058 |       "application/vnd.jupyter.widget-view+json": {
1059 |        "model_id": "ad260cbca39244f4930c06cb8a97aa59",
1060 |        "version_major": 2,
1061 |        "version_minor": 0
1062 |       },
1063 |       "text/plain": [
1064 |        "HBox(children=(IntProgress(value=0, max=39055), HTML(value='')))"
1065 |       ]
1066 |      },
1067 |      "metadata": {},
1068 |      "output_type": "display_data"
1069 |     },
1070 |     {
1071 |      "data": {
1072 |       "application/vnd.jupyter.widget-view+json": {
1073 |        "model_id": "82a0531942e94d4aa2155f706beeac1c",
1074 |        "version_major": 2,
1075 |        "version_minor": 0
1076 |       },
1077 |       "text/plain": [
1078 |        "HBox(children=(IntProgress(value=0, max=34964), HTML(value='')))"
1079 |       ]
1080 |      },
1081 |      "metadata": {},
1082 |      "output_type": "display_data"
1083 |     },
1084 |     {
1085 |      "data": {
1086 |       "application/vnd.jupyter.widget-view+json": {
1087 |        "model_id": "5f04dea202824a74ad3b409327ba06da",
1088 |        "version_major": 2,
1089 |        "version_minor": 0
1090 |       },
1091 |       "text/plain": [
1092 |        "HBox(children=(IntProgress(value=0, max=15183), HTML(value='')))"
1093 |       ]
1094 |      },
1095 |      "metadata": {},
1096 |      "output_type": "display_data"
1097 |     },
1098 |     {
1099 |      "data": {
1100 |       "application/vnd.jupyter.widget-view+json": {
1101 |        "model_id": "8349dfe2209a49db8c163dccdb0c247c",
1102 |        "version_major": 2,
1103 |        "version_minor": 0
1104 |       },
1105 |       "text/plain": [
1106 |        "HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))"
1107 |       ]
1108 |      },
1109 |      "metadata": {},
1110 |      "output_type": "display_data"
1111 |     },
1112 |     {
1113 |      "data": {
1114 |       "application/vnd.jupyter.widget-view+json": {
1115 |        "model_id": "b00a953f1145411e84308e8c519760cc",
1116 |        "version_major": 2,
1117 |        "version_minor": 0
1118 |       },
1119 |       "text/plain": [
1120 |        "HBox(children=(IntProgress(value=0, max=39096), HTML(value='')))"
1121 |       ]
1122 |      },
1123 |      "metadata": {},
1124 |      "output_type": "display_data"
1125 |     },
1126 |     {
1127 |      "data": {
1128 |       "application/vnd.jupyter.widget-view+json": {
1129 |        "model_id": "1828cabe3d534c50b60b7c3184898b89",
1130 |        "version_major": 2,
1131 |        "version_minor": 0
1132 |       },
1133 |       "text/plain": [
1134 |        "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))"
1135 |       ]
1136 |      },
1137 |      "metadata": {},
1138 |      "output_type": "display_data"
1139 |     },
1140 |     {
1141 |      "data": {
1142 |       "application/vnd.jupyter.widget-view+json": {
1143 |        "model_id": "9f5957409ada4a0f944c8f80c62baa9b",
1144 |        "version_major": 2,
1145 |        "version_minor": 0
1146 |       },
1147 |       "text/plain": [
1148 |        "HBox(children=(IntProgress(value=0, max=39058), HTML(value='')))"
1149 |       ]
1150 |      },
1151 |      "metadata": {},
1152 |      "output_type": "display_data"
1153 |     },
1154 |     {
1155 |      "data": {
1156 |       "application/vnd.jupyter.widget-view+json": {
1157 |        "model_id": "1703aae7b6ec4856b95f4bfdfbc51901",
1158 |        "version_major": 2,
1159 |        "version_minor": 0
1160 |       },
1161 |       "text/plain": [
1162 |        "HBox(children=(IntProgress(value=0, max=39033), HTML(value='')))"
1163 |       ]
1164 |      },
1165 |      "metadata": {},
1166 |      "output_type": "display_data"
1167 |     },
1168 |     {
1169 |      "data": {
1170 |       "application/vnd.jupyter.widget-view+json": {
1171 |        "model_id": "bb4caad2c94140b6b7721025f86130cb",
1172 |        "version_major": 2,
1173 |        "version_minor": 0
1174 |       },
1175 |       "text/plain": [
1176 |        "HBox(children=(IntProgress(value=0, max=39087), HTML(value='')))"
1177 |       ]
1178 |      },
1179 |      "metadata": {},
1180 |      "output_type": "display_data"
1181 |     },
1182 |     {
1183 |      "data": {
1184 |       "application/vnd.jupyter.widget-view+json": {
1185 |        "model_id": "7dcffb5b2cd24f73a6ee4d74a8165763",
1186 |        "version_major": 2,
1187 |        "version_minor": 0
1188 |       },
1189 |       "text/plain": [
1190 |        "HBox(children=(IntProgress(value=0, max=39098), HTML(value='')))"
1191 |       ]
1192 |      },
1193 |      "metadata": {},
1194 |      "output_type": "display_data"
1195 |     },
1196 |     {
1197 |      "data": {
1198 |       "application/vnd.jupyter.widget-view+json": {
1199 |        "model_id": "eb55ad2cb2e6498783d5d1f19a83afc2",
1200 |        "version_major": 2,
1201 |        "version_minor": 0
1202 |       },
1203 |       "text/plain": [
1204 |        "HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))"
1205 |       ]
1206 |      },
1207 |      "metadata": {},
1208 |      "output_type": "display_data"
1209 |     },
1210 |     {
1211 |      "data": {
1212 |       "application/vnd.jupyter.widget-view+json": {
1213 |        "model_id": "336f44ab4e0c4faaa3cb1e12fbcc3c88",
1214 |        "version_major": 2,
1215 |        "version_minor": 0
1216 |       },
1217 |       "text/plain": [
1218 |        "HBox(children=(IntProgress(value=0, max=39110), HTML(value='')))"
1219 |       ]
1220 |      },
1221 |      "metadata": {},
1222 |      "output_type": "display_data"
1223 |     },
1224 |     {
1225 |      "data": {
1226 |       "application/vnd.jupyter.widget-view+json": {
1227 |        "model_id": "9dad29362b4646f5901f0f4d3f0096e8",
1228 |        "version_major": 2,
1229 |        "version_minor": 0
1230 |       },
1231 |       "text/plain": [
1232 |        "HBox(children=(IntProgress(value=0, max=39096), HTML(value='')))"
1233 |       ]
1234 |      },
1235 |      "metadata": {},
1236 |      "output_type": "display_data"
1237 |     },
1238 |     {
1239 |      "data": {
1240 |       "application/vnd.jupyter.widget-view+json": {
1241 |        "model_id": "a1ede30be16d480abc72fbc0ab02c239",
1242 |        "version_major": 2,
1243 |        "version_minor": 0
1244 |       },
1245 |       "text/plain": [
1246 |        "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))"
1247 |       ]
1248 |      },
1249 |      "metadata": {},
1250 |      "output_type": "display_data"
1251 |     },
1252 |     {
1253 |      "data": {
1254 |       "application/vnd.jupyter.widget-view+json": {
1255 |        "model_id": "8b0514cee05d468a8e7ff73f51285e18",
1256 |        "version_major": 2,
1257 |        "version_minor": 0
1258 |       },
1259 |       "text/plain": [
1260 |        "HBox(children=(IntProgress(value=0, max=39095), HTML(value='')))"
1261 |       ]
1262 |      },
1263 |      "metadata": {},
1264 |      "output_type": "display_data"
1265 |     },
1266 |     {
1267 |      "data": {
1268 |       "application/vnd.jupyter.widget-view+json": {
1269 |        "model_id": "a3af7b89986e462280dd9377a6b645d8",
1270 |        "version_major": 2,
1271 |        "version_minor": 0
1272 |       },
1273 |       "text/plain": [
1274 |        "HBox(children=(IntProgress(value=0, max=39092), HTML(value='')))"
1275 |       ]
1276 |      },
1277 |      "metadata": {},
1278 |      "output_type": "display_data"
1279 |     },
1280 |     {
1281 |      "name": "stderr",
1282 |      "output_type": "stream",
1283 |      "text": [
1284 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1285 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1286 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1287 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1288 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1289 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1290 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1291 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1292 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1293 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1294 |      ]
1295 |     },
1296 |     {
1297 |      "name": "stdout",
1298 |      "output_type": "stream",
1299 |      "text": [
1300 |       "\n",
1301 |       "ending worker 1\n",
1302 |       "starting worker 20\n"
1303 |      ]
1304 |     },
1305 |     {
1306 |      "data": {
1307 |       "application/vnd.jupyter.widget-view+json": {
1308 |        "model_id": "feb5b649cc574ff79ee1792feb5767e4",
1309 |        "version_major": 2,
1310 |        "version_minor": 0
1311 |       },
1312 |       "text/plain": [
1313 |        "HBox(children=(IntProgress(value=0, max=39036), HTML(value='')))"
1314 |       ]
1315 |      },
1316 |      "metadata": {},
1317 |      "output_type": "display_data"
1318 |     },
1319 |     {
1320 |      "name": "stderr",
1321 |      "output_type": "stream",
1322 |      "text": [
1323 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1324 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1325 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1326 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1327 |      ]
1328 |     },
1329 |     {
1330 |      "name": "stdout",
1331 |      "output_type": "stream",
1332 |      "text": [
1333 |       "\n",
1334 |       "ending worker 0\n",
1335 |       "starting worker 21\n"
1336 |      ]
1337 |     },
1338 |     {
1339 |      "data": {
1340 |       "application/vnd.jupyter.widget-view+json": {
1341 |        "model_id": "7917d9f219ef437caf59ca95e8709a14",
1342 |        "version_major": 2,
1343 |        "version_minor": 0
1344 |       },
1345 |       "text/plain": [
1346 |        "HBox(children=(IntProgress(value=0, max=39020), HTML(value='')))"
1347 |       ]
1348 |      },
1349 |      "metadata": {},
1350 |      "output_type": "display_data"
1351 |     },
1352 |     {
1353 |      "name": "stderr",
1354 |      "output_type": "stream",
1355 |      "text": [
1356 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1357 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1358 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1359 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1360 |      ]
1361 |     },
1362 |     {
1363 |      "name": "stdout",
1364 |      "output_type": "stream",
1365 |      "text": [
1366 |       "\n",
1367 |       "ending worker 2\n",
1368 |       "starting worker 22\n"
1369 |      ]
1370 |     },
1371 |     {
1372 |      "data": {
1373 |       "application/vnd.jupyter.widget-view+json": {
1374 |        "model_id": "12908b72b6874ace82cb9da2f91147c6",
1375 |        "version_major": 2,
1376 |        "version_minor": 0
1377 |       },
1378 |       "text/plain": [
1379 |        "HBox(children=(IntProgress(value=0, max=39103), HTML(value='')))"
1380 |       ]
1381 |      },
1382 |      "metadata": {},
1383 |      "output_type": "display_data"
1384 |     },
1385 |     {
1386 |      "name": "stdout",
1387 |      "output_type": "stream",
1388 |      "text": [
1389 |       "\n",
1390 |       "ending worker 17\n",
1391 |       "starting worker 23\n"
1392 |      ]
1393 |     },
1394 |     {
1395 |      "data": {
1396 |       "application/vnd.jupyter.widget-view+json": {
1397 |        "model_id": "8c92c11e3dee4c4ab740bc02e5a69d1c",
1398 |        "version_major": 2,
1399 |        "version_minor": 0
1400 |       },
1401 |       "text/plain": [
1402 |        "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))"
1403 |       ]
1404 |      },
1405 |      "metadata": {},
1406 |      "output_type": "display_data"
1407 |     },
1408 |     {
1409 |      "name": "stdout",
1410 |      "output_type": "stream",
1411 |      "text": [
1412 |       "\n",
1413 |       "ending worker 15\n",
1414 |       "starting worker 24\n"
1415 |      ]
1416 |     },
1417 |     {
1418 |      "data": {
1419 |       "application/vnd.jupyter.widget-view+json": {
1420 |        "model_id": "e004d296d8214f6a958232289f273759",
1421 |        "version_major": 2,
1422 |        "version_minor": 0
1423 |       },
1424 |       "text/plain": [
1425 |        "HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))"
1426 |       ]
1427 |      },
1428 |      "metadata": {},
1429 |      "output_type": "display_data"
1430 |     },
1431 |     {
1432 |      "name": "stdout",
1433 |      "output_type": "stream",
1434 |      "text": [
1435 |       "\n",
1436 |       "ending worker 14\n",
1437 |       "starting worker 25\n"
1438 |      ]
1439 |     },
1440 |     {
1441 |      "data": {
1442 |       "application/vnd.jupyter.widget-view+json": {
1443 |        "model_id": "e10fe83368964f69b5be5c72a0d104ca",
1444 |        "version_major": 2,
1445 |        "version_minor": 0
1446 |       },
1447 |       "text/plain": [
1448 |        "HBox(children=(IntProgress(value=0, max=39094), HTML(value='')))"
1449 |       ]
1450 |      },
1451 |      "metadata": {},
1452 |      "output_type": "display_data"
1453 |     },
1454 |     {
1455 |      "name": "stdout",
1456 |      "output_type": "stream",
1457 |      "text": [
1458 |       "\n",
1459 |       "\n",
1460 |       "ending worker 18\n",
1461 |       "starting worker 26\n"
1462 |      ]
1463 |     },
1464 |     {
1465 |      "data": {
1466 |       "application/vnd.jupyter.widget-view+json": {
1467 |        "model_id": "c40f9d61202e42a48c8c8304e035bcd3",
1468 |        "version_major": 2,
1469 |        "version_minor": 0
1470 |       },
1471 |       "text/plain": [
1472 |        "HBox(children=(IntProgress(value=0, max=39048), HTML(value='')))"
1473 |       ]
1474 |      },
1475 |      "metadata": {},
1476 |      "output_type": "display_data"
1477 |     },
1478 |     {
1479 |      "name": "stdout",
1480 |      "output_type": "stream",
1481 |      "text": [
1482 |       "ending worker 11\n",
1483 |       "starting worker 27\n"
1484 |      ]
1485 |     },
1486 |     {
1487 |      "data": {
1488 |       "application/vnd.jupyter.widget-view+json": {
1489 |        "model_id": "8a571bb8c63d41eb9a1d1acd1cef3984",
1490 |        "version_major": 2,
1491 |        "version_minor": 0
1492 |       },
1493 |       "text/plain": [
1494 |        "HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))"
1495 |       ]
1496 |      },
1497 |      "metadata": {},
1498 |      "output_type": "display_data"
1499 |     },
1500 |     {
1501 |      "name": "stdout",
1502 |      "output_type": "stream",
1503 |      "text": [
1504 |       "\n",
1505 |       "\n",
1506 |       "ending worker 10\n",
1507 |       "starting worker 28\n"
1508 |      ]
1509 |     },
1510 |     {
1511 |      "data": {
1512 |       "application/vnd.jupyter.widget-view+json": {
1513 |        "model_id": "09bffa98949f49128250f1474b097767",
1514 |        "version_major": 2,
1515 |        "version_minor": 0
1516 |       },
1517 |       "text/plain": [
1518 |        "HBox(children=(IntProgress(value=0, max=39095), HTML(value='')))"
1519 |       ]
1520 |      },
1521 |      "metadata": {},
1522 |      "output_type": "display_data"
1523 |     },
1524 |     {
1525 |      "name": "stdout",
1526 |      "output_type": "stream",
1527 |      "text": [
1528 |       "ending worker 8\n",
1529 |       "starting worker 29\n"
1530 |      ]
1531 |     },
1532 |     {
1533 |      "data": {
1534 |       "application/vnd.jupyter.widget-view+json": {
1535 |        "model_id": "86baa0da721f4a80815b315be9723348",
1536 |        "version_major": 2,
1537 |        "version_minor": 0
1538 |       },
1539 |       "text/plain": [
1540 |        "HBox(children=(IntProgress(value=0, max=39083), HTML(value='')))"
1541 |       ]
1542 |      },
1543 |      "metadata": {},
1544 |      "output_type": "display_data"
1545 |     },
1546 |     {
1547 |      "name": "stdout",
1548 |      "output_type": "stream",
1549 |      "text": [
1550 |       "\n",
1551 |       "\n",
1552 |       "ending worker 7\n",
1553 |       "starting worker 30\n"
1554 |      ]
1555 |     },
1556 |     {
1557 |      "data": {
1558 |       "application/vnd.jupyter.widget-view+json": {
1559 |        "model_id": "476eeff05e5b438daf26731c05a88885",
1560 |        "version_major": 2,
1561 |        "version_minor": 0
1562 |       },
1563 |       "text/plain": [
1564 |        "HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))"
1565 |       ]
1566 |      },
1567 |      "metadata": {},
1568 |      "output_type": "display_data"
1569 |     },
1570 |     {
1571 |      "name": "stdout",
1572 |      "output_type": "stream",
1573 |      "text": [
1574 |       "ending worker 19\n",
1575 |       "starting worker 31\n"
1576 |      ]
1577 |     },
1578 |     {
1579 |      "data": {
1580 |       "application/vnd.jupyter.widget-view+json": {
1581 |        "model_id": "440fe94d34324ed880de4b8f97e8ca68",
1582 |        "version_major": 2,
1583 |        "version_minor": 0
1584 |       },
1585 |       "text/plain": [
1586 |        "HBox(children=(IntProgress(value=0, max=39058), HTML(value='')))"
1587 |       ]
1588 |      },
1589 |      "metadata": {},
1590 |      "output_type": "display_data"
1591 |     },
1592 |     {
1593 |      "name": "stdout",
1594 |      "output_type": "stream",
1595 |      "text": [
1596 |       "\n",
1597 |       "ending worker 3\n",
1598 |       "starting worker 32\n"
1599 |      ]
1600 |     },
1601 |     {
1602 |      "data": {
1603 |       "application/vnd.jupyter.widget-view+json": {
1604 |        "model_id": "54a3fb1ddd6d4bcc8162d66224b2cc5a",
1605 |        "version_major": 2,
1606 |        "version_minor": 0
1607 |       },
1608 |       "text/plain": [
1609 |        "HBox(children=(IntProgress(value=0, max=39080), HTML(value='')))"
1610 |       ]
1611 |      },
1612 |      "metadata": {},
1613 |      "output_type": "display_data"
1614 |     },
1615 |     {
1616 |      "name": "stdout",
1617 |      "output_type": "stream",
1618 |      "text": [
1619 |       "\n",
1620 |       "ending worker 13\n",
1621 |       "starting worker 33\n",
1622 |       "\n"
1623 |      ]
1624 |     },
1625 |     {
1626 |      "data": {
1627 |       "application/vnd.jupyter.widget-view+json": {
1628 |        "model_id": "14f42c707acf41658665725876a6f61b",
1629 |        "version_major": 2,
1630 |        "version_minor": 0
1631 |       },
1632 |       "text/plain": [
1633 |        "HBox(children=(IntProgress(value=0, max=39072), HTML(value='')))"
1634 |       ]
1635 |      },
1636 |      "metadata": {},
1637 |      "output_type": "display_data"
1638 |     },
1639 |     {
1640 |      "name": "stdout",
1641 |      "output_type": "stream",
1642 |      "text": [
1643 |       "\n",
1644 |       "ending worker 12\n",
1645 |       "starting worker 34\n"
1646 |      ]
1647 |     },
1648 |     {
1649 |      "data": {
1650 |       "application/vnd.jupyter.widget-view+json": {
1651 |        "model_id": "61bd8aa5226a40e9b2fd39219ee80c39",
1652 |        "version_major": 2,
1653 |        "version_minor": 0
1654 |       },
1655 |       "text/plain": [
1656 |        "HBox(children=(IntProgress(value=0, max=39073), HTML(value='')))"
1657 |       ]
1658 |      },
1659 |      "metadata": {},
1660 |      "output_type": "display_data"
1661 |     },
1662 |     {
1663 |      "name": "stdout",
1664 |      "output_type": "stream",
1665 |      "text": [
1666 |       "ending worker 16\n",
1667 |       "starting worker 35\n"
1668 |      ]
1669 |     },
1670 |     {
1671 |      "data": {
1672 |       "application/vnd.jupyter.widget-view+json": {
1673 |        "model_id": "f5ba91c875fa40d3a1fc51ffc1d69fa0",
1674 |        "version_major": 2,
1675 |        "version_minor": 0
1676 |       },
1677 |       "text/plain": [
1678 |        "HBox(children=(IntProgress(value=0, max=39094), HTML(value='')))"
1679 |       ]
1680 |      },
1681 |      "metadata": {},
1682 |      "output_type": "display_data"
1683 |     },
1684 |     {
1685 |      "name": "stderr",
1686 |      "output_type": "stream",
1687 |      "text": [
1688 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1689 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1690 |      ]
1691 |     },
1692 |     {
1693 |      "name": "stdout",
1694 |      "output_type": "stream",
1695 |      "text": [
1696 |       "\n",
1697 |       "ending worker 9\n",
1698 |       "starting worker 36\n"
1699 |      ]
1700 |     },
1701 |     {
1702 |      "data": {
1703 |       "application/vnd.jupyter.widget-view+json": {
1704 |        "model_id": "4e9a1907b9044b74b09082a44c93665f",
1705 |        "version_major": 2,
1706 |        "version_minor": 0
1707 |       },
1708 |       "text/plain": [
1709 |        "HBox(children=(IntProgress(value=0, max=39106), HTML(value='')))"
1710 |       ]
1711 |      },
1712 |      "metadata": {},
1713 |      "output_type": "display_data"
1714 |     },
1715 |     {
1716 |      "name": "stdout",
1717 |      "output_type": "stream",
1718 |      "text": [
1719 |       "\n",
1720 |       "ending worker 6\n",
1721 |       "starting worker 37\n"
1722 |      ]
1723 |     },
1724 |     {
1725 |      "data": {
1726 |       "application/vnd.jupyter.widget-view+json": {
1727 |        "model_id": "3f7770c9a0f94968bf0dd05c7123361f",
1728 |        "version_major": 2,
1729 |        "version_minor": 0
1730 |       },
1731 |       "text/plain": [
1732 |        "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))"
1733 |       ]
1734 |      },
1735 |      "metadata": {},
1736 |      "output_type": "display_data"
1737 |     },
1738 |     {
1739 |      "name": "stdout",
1740 |      "output_type": "stream",
1741 |      "text": [
1742 |       "\n",
1743 |       "ending worker 5\n",
1744 |       "starting worker 38\n"
1745 |      ]
1746 |     },
1747 |     {
1748 |      "data": {
1749 |       "application/vnd.jupyter.widget-view+json": {
1750 |        "model_id": "7c30ce8e8850483ca10cb1735d7ee491",
1751 |        "version_major": 2,
1752 |        "version_minor": 0
1753 |       },
1754 |       "text/plain": [
1755 |        "HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))"
1756 |       ]
1757 |      },
1758 |      "metadata": {},
1759 |      "output_type": "display_data"
1760 |     },
1761 |     {
1762 |      "name": "stdout",
1763 |      "output_type": "stream",
1764 |      "text": [
1765 |       "\n",
1766 |       "ending worker 4\n",
1767 |       "starting worker 39\n"
1768 |      ]
1769 |     },
1770 |     {
1771 |      "data": {
1772 |       "application/vnd.jupyter.widget-view+json": {
1773 |        "model_id": "c05ceea5df6a491591ac7370462edd29",
1774 |        "version_major": 2,
1775 |        "version_minor": 0
1776 |       },
1777 |       "text/plain": [
1778 |        "HBox(children=(IntProgress(value=0, max=39041), HTML(value='')))"
1779 |       ]
1780 |      },
1781 |      "metadata": {},
1782 |      "output_type": "display_data"
1783 |     },
1784 |     {
1785 |      "name": "stderr",
1786 |      "output_type": "stream",
1787 |      "text": [
1788 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1789 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1790 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1791 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1792 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1793 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1794 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1795 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1796 |      ]
1797 |     },
1798 |     {
1799 |      "name": "stdout",
1800 |      "output_type": "stream",
1801 |      "text": [
1802 |       "\n",
1803 |       "ending worker 20\n",
1804 |       "starting worker 40\n"
1805 |      ]
1806 |     },
1807 |     {
1808 |      "data": {
1809 |       "application/vnd.jupyter.widget-view+json": {
1810 |        "model_id": "c758ba50473345f3913a8773e564d5ef",
1811 |        "version_major": 2,
1812 |        "version_minor": 0
1813 |       },
1814 |       "text/plain": [
1815 |        "HBox(children=(IntProgress(value=0, max=39098), HTML(value='')))"
1816 |       ]
1817 |      },
1818 |      "metadata": {},
1819 |      "output_type": "display_data"
1820 |     },
1821 |     {
1822 |      "name": "stderr",
1823 |      "output_type": "stream",
1824 |      "text": [
1825 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1826 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1827 |      ]
1828 |     },
1829 |     {
1830 |      "name": "stdout",
1831 |      "output_type": "stream",
1832 |      "text": [
1833 |       "\n",
1834 |       "ending worker 21\n",
1835 |       "starting worker 41\n"
1836 |      ]
1837 |     },
1838 |     {
1839 |      "data": {
1840 |       "application/vnd.jupyter.widget-view+json": {
1841 |        "model_id": "e033b743ebce4cdeaab35ffebc70f755",
1842 |        "version_major": 2,
1843 |        "version_minor": 0
1844 |       },
1845 |       "text/plain": [
1846 |        "HBox(children=(IntProgress(value=0, max=39046), HTML(value='')))"
1847 |       ]
1848 |      },
1849 |      "metadata": {},
1850 |      "output_type": "display_data"
1851 |     },
1852 |     {
1853 |      "name": "stderr",
1854 |      "output_type": "stream",
1855 |      "text": [
1856 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1857 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1858 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1859 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
1860 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
1861 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
1862 |      ]
1863 |     },
1864 |     {
1865 |      "name": "stdout",
1866 |      "output_type": "stream",
1867 |      "text": [
1868 |       "\n",
1869 |       "ending worker 22\n",
1870 |       "starting worker 42\n"
1871 |      ]
1872 |     },
1873 |     {
1874 |      "data": {
1875 |       "application/vnd.jupyter.widget-view+json": {
1876 |        "model_id": "d3ad7909814c4659b561957fc2ac8add",
1877 |        "version_major": 2,
1878 |        "version_minor": 0
1879 |       },
1880 |       "text/plain": [
1881 |        "HBox(children=(IntProgress(value=0, max=39022), HTML(value='')))"
1882 |       ]
1883 |      },
1884 |      "metadata": {},
1885 |      "output_type": "display_data"
1886 |     },
1887 |     {
1888 |      "name": "stdout",
1889 |      "output_type": "stream",
1890 |      "text": [
1891 |       "\n",
1892 |       "ending worker 23\n",
1893 |       "starting worker 43\n"
1894 |      ]
1895 |     },
1896 |     {
1897 |      "data": {
1898 |       "application/vnd.jupyter.widget-view+json": {
1899 |        "model_id": "5050873972124566b3efc14d7d017e23",
1900 |        "version_major": 2,
1901 |        "version_minor": 0
1902 |       },
1903 |       "text/plain": [
1904 |        "HBox(children=(IntProgress(value=0, max=39002), HTML(value='')))"
1905 |       ]
1906 |      },
1907 |      "metadata": {},
1908 |      "output_type": "display_data"
1909 |     },
1910 |     {
1911 |      "name": "stdout",
1912 |      "output_type": "stream",
1913 |      "text": [
1914 |       "\n",
1915 |       "\n",
1916 |       "\n",
1917 |       "ending worker 29\n",
1918 |       "starting worker 44\n"
1919 |      ]
1920 |     },
1921 |     {
1922 |      "data": {
1923 |       "application/vnd.jupyter.widget-view+json": {
1924 |        "model_id": "fe133f2eed7e483e96b4c958a49a741d",
1925 |        "version_major": 2,
1926 |        "version_minor": 0
1927 |       },
1928 |       "text/plain": [
1929 |        "HBox(children=(IntProgress(value=0, max=39027), HTML(value='')))"
1930 |       ]
1931 |      },
1932 |      "metadata": {},
1933 |      "output_type": "display_data"
1934 |     },
1935 |     {
1936 |      "name": "stdout",
1937 |      "output_type": "stream",
1938 |      "text": [
1939 |       "ending worker 24\n",
1940 |       "starting worker 45\n",
1941 |       "ending worker 25\n",
1942 |       "starting worker 46\n"
1943 |      ]
1944 |     },
1945 |     {
1946 |      "data": {
1947 |       "application/vnd.jupyter.widget-view+json": {
1948 |        "model_id": "86fd5e12ba184476b7364b35445cbdb5",
1949 |        "version_major": 2,
1950 |        "version_minor": 0
1951 |       },
1952 |       "text/plain": [
1953 |        "HBox(children=(IntProgress(value=0, max=39035), HTML(value='')))"
1954 |       ]
1955 |      },
1956 |      "metadata": {},
1957 |      "output_type": "display_data"
1958 |     },
1959 |     {
1960 |      "data": {
1961 |       "application/vnd.jupyter.widget-view+json": {
1962 |        "model_id": "94dfca9b34a34af1abf3d2699ceb5a6b",
1963 |        "version_major": 2,
1964 |        "version_minor": 0
1965 |       },
1966 |       "text/plain": [
1967 |        "HBox(children=(IntProgress(value=0, max=39069), HTML(value='')))"
1968 |       ]
1969 |      },
1970 |      "metadata": {},
1971 |      "output_type": "display_data"
1972 |     },
1973 |     {
1974 |      "name": "stdout",
1975 |      "output_type": "stream",
1976 |      "text": [
1977 |       "\n",
1978 |       "ending worker 32\n",
1979 |       "starting worker 47\n"
1980 |      ]
1981 |     },
1982 |     {
1983 |      "data": {
1984 |       "application/vnd.jupyter.widget-view+json": {
1985 |        "model_id": "cbda2cf3f4d94cb19a883697303289c8",
1986 |        "version_major": 2,
1987 |        "version_minor": 0
1988 |       },
1989 |       "text/plain": [
1990 |        "HBox(children=(IntProgress(value=0, max=39063), HTML(value='')))"
1991 |       ]
1992 |      },
1993 |      "metadata": {},
1994 |      "output_type": "display_data"
1995 |     },
1996 |     {
1997 |      "name": "stdout",
1998 |      "output_type": "stream",
1999 |      "text": [
2000 |       "\n"
2001 |      ]
2002 |     },
2003 |     {
2004 |      "name": "stderr",
2005 |      "output_type": "stream",
2006 |      "text": [
2007 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2008 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
2009 |      ]
2010 |     },
2011 |     {
2012 |      "name": "stdout",
2013 |      "output_type": "stream",
2014 |      "text": [
2015 |       "ending worker 28\n",
2016 |       "starting worker 48\n"
2017 |      ]
2018 |     },
2019 |     {
2020 |      "data": {
2021 |       "application/vnd.jupyter.widget-view+json": {
2022 |        "model_id": "267952289e5843118529ceef8a663448",
2023 |        "version_major": 2,
2024 |        "version_minor": 0
2025 |       },
2026 |       "text/plain": [
2027 |        "HBox(children=(IntProgress(value=0, max=39090), HTML(value='')))"
2028 |       ]
2029 |      },
2030 |      "metadata": {},
2031 |      "output_type": "display_data"
2032 |     },
2033 |     {
2034 |      "name": "stdout",
2035 |      "output_type": "stream",
2036 |      "text": [
2037 |       "\n",
2038 |       "ending worker 30\n",
2039 |       "starting worker 49\n"
2040 |      ]
2041 |     },
2042 |     {
2043 |      "data": {
2044 |       "application/vnd.jupyter.widget-view+json": {
2045 |        "model_id": "37e90eb08503473c9b293d01432c1a4f",
2046 |        "version_major": 2,
2047 |        "version_minor": 0
2048 |       },
2049 |       "text/plain": [
2050 |        "HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))"
2051 |       ]
2052 |      },
2053 |      "metadata": {},
2054 |      "output_type": "display_data"
2055 |     },
2056 |     {
2057 |      "name": "stdout",
2058 |      "output_type": "stream",
2059 |      "text": [
2060 |       "\n",
2061 |       "ending worker 26\n",
2062 |       "starting worker 50\n"
2063 |      ]
2064 |     },
2065 |     {
2066 |      "data": {
2067 |       "application/vnd.jupyter.widget-view+json": {
2068 |        "model_id": "36465ac60b1c47c9bb51169aae25452a",
2069 |        "version_major": 2,
2070 |        "version_minor": 0
2071 |       },
2072 |       "text/plain": [
2073 |        "HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))"
2074 |       ]
2075 |      },
2076 |      "metadata": {},
2077 |      "output_type": "display_data"
2078 |     },
2079 |     {
2080 |      "name": "stdout",
2081 |      "output_type": "stream",
2082 |      "text": [
2083 |       "\n",
2084 |       "\n",
2085 |       "ending worker 33\n",
2086 |       "starting worker 51\n"
2087 |      ]
2088 |     },
2089 |     {
2090 |      "data": {
2091 |       "application/vnd.jupyter.widget-view+json": {
2092 |        "model_id": "58f1b7e8a23a4800aa57f8ca3956f3c3",
2093 |        "version_major": 2,
2094 |        "version_minor": 0
2095 |       },
2096 |       "text/plain": [
2097 |        "HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))"
2098 |       ]
2099 |      },
2100 |      "metadata": {},
2101 |      "output_type": "display_data"
2102 |     },
2103 |     {
2104 |      "name": "stdout",
2105 |      "output_type": "stream",
2106 |      "text": [
2107 |       "ending worker 34\n",
2108 |       "starting worker 52\n"
2109 |      ]
2110 |     },
2111 |     {
2112 |      "data": {
2113 |       "application/vnd.jupyter.widget-view+json": {
2114 |        "model_id": "33ec1219dacc451faf9075f8a79e1cb6",
2115 |        "version_major": 2,
2116 |        "version_minor": 0
2117 |       },
2118 |       "text/plain": [
2119 |        "HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))"
2120 |       ]
2121 |      },
2122 |      "metadata": {},
2123 |      "output_type": "display_data"
2124 |     },
2125 |     {
2126 |      "name": "stdout",
2127 |      "output_type": "stream",
2128 |      "text": [
2129 |       "\n",
2130 |       "ending worker 35\n",
2131 |       "starting worker 53\n"
2132 |      ]
2133 |     },
2134 |     {
2135 |      "data": {
2136 |       "application/vnd.jupyter.widget-view+json": {
2137 |        "model_id": "429f6420f5ef49e2b7606fab71e9730e",
2138 |        "version_major": 2,
2139 |        "version_minor": 0
2140 |       },
2141 |       "text/plain": [
2142 |        "HBox(children=(IntProgress(value=0, max=39037), HTML(value='')))"
2143 |       ]
2144 |      },
2145 |      "metadata": {},
2146 |      "output_type": "display_data"
2147 |     },
2148 |     {
2149 |      "name": "stdout",
2150 |      "output_type": "stream",
2151 |      "text": [
2152 |       "\n",
2153 |       "ending worker 27\n",
2154 |       "starting worker 54\n"
2155 |      ]
2156 |     },
2157 |     {
2158 |      "data": {
2159 |       "application/vnd.jupyter.widget-view+json": {
2160 |        "model_id": "d05d9a21a08a43329f185cb65b175959",
2161 |        "version_major": 2,
2162 |        "version_minor": 0
2163 |       },
2164 |       "text/plain": [
2165 |        "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))"
2166 |       ]
2167 |      },
2168 |      "metadata": {},
2169 |      "output_type": "display_data"
2170 |     },
2171 |     {
2172 |      "name": "stdout",
2173 |      "output_type": "stream",
2174 |      "text": [
2175 |       "\n",
2176 |       "\n",
2177 |       "ending worker 37\n",
2178 |       "starting worker 55\n"
2179 |      ]
2180 |     },
2181 |     {
2182 |      "data": {
2183 |       "application/vnd.jupyter.widget-view+json": {
2184 |        "model_id": "c12b60ec0a8949128441fd6abc1eb20d",
2185 |        "version_major": 2,
2186 |        "version_minor": 0
2187 |       },
2188 |       "text/plain": [
2189 |        "HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))"
2190 |       ]
2191 |      },
2192 |      "metadata": {},
2193 |      "output_type": "display_data"
2194 |     },
2195 |     {
2196 |      "name": "stdout",
2197 |      "output_type": "stream",
2198 |      "text": [
2199 |       "ending worker 39\n",
2200 |       "starting worker 56\n"
2201 |      ]
2202 |     },
2203 |     {
2204 |      "data": {
2205 |       "application/vnd.jupyter.widget-view+json": {
2206 |        "model_id": "2ec85bc324f64c5bb9b338f9c9665bea",
2207 |        "version_major": 2,
2208 |        "version_minor": 0
2209 |       },
2210 |       "text/plain": [
2211 |        "HBox(children=(IntProgress(value=0, max=39085), HTML(value='')))"
2212 |       ]
2213 |      },
2214 |      "metadata": {},
2215 |      "output_type": "display_data"
2216 |     },
2217 |     {
2218 |      "name": "stdout",
2219 |      "output_type": "stream",
2220 |      "text": [
2221 |       "\n",
2222 |       "ending worker 31\n",
2223 |       "starting worker 57\n"
2224 |      ]
2225 |     },
2226 |     {
2227 |      "data": {
2228 |       "application/vnd.jupyter.widget-view+json": {
2229 |        "model_id": "f20bd7adf3ea42a59ea3c55fe3e934b6",
2230 |        "version_major": 2,
2231 |        "version_minor": 0
2232 |       },
2233 |       "text/plain": [
2234 |        "HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))"
2235 |       ]
2236 |      },
2237 |      "metadata": {},
2238 |      "output_type": "display_data"
2239 |     },
2240 |     {
2241 |      "name": "stdout",
2242 |      "output_type": "stream",
2243 |      "text": [
2244 |       "\n",
2245 |       "ending worker 36\n",
2246 |       "starting worker 58\n"
2247 |      ]
2248 |     },
2249 |     {
2250 |      "data": {
2251 |       "application/vnd.jupyter.widget-view+json": {
2252 |        "model_id": "35c4540da7154d3db45e55ec20343e7a",
2253 |        "version_major": 2,
2254 |        "version_minor": 0
2255 |       },
2256 |       "text/plain": [
2257 |        "HBox(children=(IntProgress(value=0, max=39085), HTML(value='')))"
2258 |       ]
2259 |      },
2260 |      "metadata": {},
2261 |      "output_type": "display_data"
2262 |     },
2263 |     {
2264 |      "name": "stdout",
2265 |      "output_type": "stream",
2266 |      "text": [
2267 |       "\n",
2268 |       "ending worker 38\n",
2269 |       "starting worker 59\n"
2270 |      ]
2271 |     },
2272 |     {
2273 |      "data": {
2274 |       "application/vnd.jupyter.widget-view+json": {
2275 |        "model_id": "3ab69318aa804f45a24122579fd3cb2c",
2276 |        "version_major": 2,
2277 |        "version_minor": 0
2278 |       },
2279 |       "text/plain": [
2280 |        "HBox(children=(IntProgress(value=0, max=39051), HTML(value='')))"
2281 |       ]
2282 |      },
2283 |      "metadata": {},
2284 |      "output_type": "display_data"
2285 |     },
2286 |     {
2287 |      "name": "stderr",
2288 |      "output_type": "stream",
2289 |      "text": [
2290 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2291 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2292 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2293 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2294 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2295 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
2296 |      ]
2297 |     },
2298 |     {
2299 |      "name": "stdout",
2300 |      "output_type": "stream",
2301 |      "text": [
2302 |       "\n",
2303 |       "ending worker 40\n",
2304 |       "starting worker 60\n"
2305 |      ]
2306 |     },
2307 |     {
2308 |      "data": {
2309 |       "application/vnd.jupyter.widget-view+json": {
2310 |        "model_id": "4f1c8a480cb54b1f95fcf5ed19eae1e8",
2311 |        "version_major": 2,
2312 |        "version_minor": 0
2313 |       },
2314 |       "text/plain": [
2315 |        "HBox(children=(IntProgress(value=0, max=39077), HTML(value='')))"
2316 |       ]
2317 |      },
2318 |      "metadata": {},
2319 |      "output_type": "display_data"
2320 |     },
2321 |     {
2322 |      "name": "stderr",
2323 |      "output_type": "stream",
2324 |      "text": [
2325 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2326 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
2327 |      ]
2328 |     },
2329 |     {
2330 |      "name": "stdout",
2331 |      "output_type": "stream",
2332 |      "text": [
2333 |       "\n",
2334 |       "ending worker 41\n",
2335 |       "starting worker 61\n"
2336 |      ]
2337 |     },
2338 |     {
2339 |      "data": {
2340 |       "application/vnd.jupyter.widget-view+json": {
2341 |        "model_id": "d7989c84f5574f97ac8d1d18b5a0bf1a",
2342 |        "version_major": 2,
2343 |        "version_minor": 0
2344 |       },
2345 |       "text/plain": [
2346 |        "HBox(children=(IntProgress(value=0, max=39103), HTML(value='')))"
2347 |       ]
2348 |      },
2349 |      "metadata": {},
2350 |      "output_type": "display_data"
2351 |     },
2352 |     {
2353 |      "name": "stderr",
2354 |      "output_type": "stream",
2355 |      "text": [
2356 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2357 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2358 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2359 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2360 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2361 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2362 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2363 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2364 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2365 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
2366 |      ]
2367 |     },
2368 |     {
2369 |      "name": "stdout",
2370 |      "output_type": "stream",
2371 |      "text": [
2372 |       "\n",
2373 |       "ending worker 42\n",
2374 |       "starting worker 62\n"
2375 |      ]
2376 |     },
2377 |     {
2378 |      "data": {
2379 |       "application/vnd.jupyter.widget-view+json": {
2380 |        "model_id": "769c2d9e4eee451586cdcc99f70522a0",
2381 |        "version_major": 2,
2382 |        "version_minor": 0
2383 |       },
2384 |       "text/plain": [
2385 |        "HBox(children=(IntProgress(value=0, max=39051), HTML(value='')))"
2386 |       ]
2387 |      },
2388 |      "metadata": {},
2389 |      "output_type": "display_data"
2390 |     },
2391 |     {
2392 |      "name": "stdout",
2393 |      "output_type": "stream",
2394 |      "text": [
2395 |       "\n",
2396 |       "\n",
2397 |       "\n",
2398 |       "\n",
2399 |       "ending worker 58\n",
2400 |       "starting worker 63\n"
2401 |      ]
2402 |     },
2403 |     {
2404 |      "data": {
2405 |       "application/vnd.jupyter.widget-view+json": {
2406 |        "model_id": "838c7a79292d4402b999399e4dc389a3",
2407 |        "version_major": 2,
2408 |        "version_minor": 0
2409 |       },
2410 |       "text/plain": [
2411 |        "HBox(children=(IntProgress(value=0, max=39050), HTML(value='')))"
2412 |       ]
2413 |      },
2414 |      "metadata": {},
2415 |      "output_type": "display_data"
2416 |     },
2417 |     {
2418 |      "name": "stdout",
2419 |      "output_type": "stream",
2420 |      "text": [
2421 |       "ending worker 43\n",
2422 |       "starting worker 64\n",
2423 |       "\n"
2424 |      ]
2425 |     },
2426 |     {
2427 |      "data": {
2428 |       "application/vnd.jupyter.widget-view+json": {
2429 |        "model_id": "ac1b0d7e39054a0fb29823de2be74fdb",
2430 |        "version_major": 2,
2431 |        "version_minor": 0
2432 |       },
2433 |       "text/plain": [
2434 |        "HBox(children=(IntProgress(value=0, max=39109), HTML(value='')))"
2435 |       ]
2436 |      },
2437 |      "metadata": {},
2438 |      "output_type": "display_data"
2439 |     },
2440 |     {
2441 |      "name": "stdout",
2442 |      "output_type": "stream",
2443 |      "text": [
2444 |       "ending worker 54\n",
2445 |       "starting worker 65\n"
2446 |      ]
2447 |     },
2448 |     {
2449 |      "data": {
2450 |       "application/vnd.jupyter.widget-view+json": {
2451 |        "model_id": "5fc9dbfd318e416baa48aa1142c6ad2e",
2452 |        "version_major": 2,
2453 |        "version_minor": 0
2454 |       },
2455 |       "text/plain": [
2456 |        "HBox(children=(IntProgress(value=0, max=39093), HTML(value='')))"
2457 |       ]
2458 |      },
2459 |      "metadata": {},
2460 |      "output_type": "display_data"
2461 |     },
2462 |     {
2463 |      "name": "stdout",
2464 |      "output_type": "stream",
2465 |      "text": [
2466 |       "ending worker 56\n",
2467 |       "starting worker 66\n"
2468 |      ]
2469 |     },
2470 |     {
2471 |      "data": {
2472 |       "application/vnd.jupyter.widget-view+json": {
2473 |        "model_id": "6bd6f6ffde694b2cbb8d302561da3462",
2474 |        "version_major": 2,
2475 |        "version_minor": 0
2476 |       },
2477 |       "text/plain": [
2478 |        "HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))"
2479 |       ]
2480 |      },
2481 |      "metadata": {},
2482 |      "output_type": "display_data"
2483 |     },
2484 |     {
2485 |      "name": "stdout",
2486 |      "output_type": "stream",
2487 |      "text": [
2488 |       "ending worker 47\n",
2489 |       "starting worker 67\n"
2490 |      ]
2491 |     },
2492 |     {
2493 |      "data": {
2494 |       "application/vnd.jupyter.widget-view+json": {
2495 |        "model_id": "76ebec5cbb4147359012a1a2643eb21e",
2496 |        "version_major": 2,
2497 |        "version_minor": 0
2498 |       },
2499 |       "text/plain": [
2500 |        "HBox(children=(IntProgress(value=0, max=39134), HTML(value='')))"
2501 |       ]
2502 |      },
2503 |      "metadata": {},
2504 |      "output_type": "display_data"
2505 |     },
2506 |     {
2507 |      "name": "stdout",
2508 |      "output_type": "stream",
2509 |      "text": [
2510 |       "\n",
2511 |       "\n",
2512 |       "\n",
2513 |       "ending worker 51\n",
2514 |       "starting worker 68\n"
2515 |      ]
2516 |     },
2517 |     {
2518 |      "data": {
2519 |       "application/vnd.jupyter.widget-view+json": {
2520 |        "model_id": "df29f98ab86b43bbb24bcadcfdf4f953",
2521 |        "version_major": 2,
2522 |        "version_minor": 0
2523 |       },
2524 |       "text/plain": [
2525 |        "HBox(children=(IntProgress(value=0, max=39056), HTML(value='')))"
2526 |       ]
2527 |      },
2528 |      "metadata": {},
2529 |      "output_type": "display_data"
2530 |     },
2531 |     {
2532 |      "name": "stdout",
2533 |      "output_type": "stream",
2534 |      "text": [
2535 |       "\n",
2536 |       "ending worker 44\n",
2537 |       "starting worker 69\n"
2538 |      ]
2539 |     },
2540 |     {
2541 |      "data": {
2542 |       "application/vnd.jupyter.widget-view+json": {
2543 |        "model_id": "ee02a86fd50e48bc86a5388c2de2053e",
2544 |        "version_major": 2,
2545 |        "version_minor": 0
2546 |       },
2547 |       "text/plain": [
2548 |        "HBox(children=(IntProgress(value=0, max=39060), HTML(value='')))"
2549 |       ]
2550 |      },
2551 |      "metadata": {},
2552 |      "output_type": "display_data"
2553 |     },
2554 |     {
2555 |      "name": "stdout",
2556 |      "output_type": "stream",
2557 |      "text": [
2558 |       "ending worker 59\n",
2559 |       "starting worker 70\n"
2560 |      ]
2561 |     },
2562 |     {
2563 |      "data": {
2564 |       "application/vnd.jupyter.widget-view+json": {
2565 |        "model_id": "90f31849bf7b4f7cb529aa71f837590a",
2566 |        "version_major": 2,
2567 |        "version_minor": 0
2568 |       },
2569 |       "text/plain": [
2570 |        "HBox(children=(IntProgress(value=0, max=39083), HTML(value='')))"
2571 |       ]
2572 |      },
2573 |      "metadata": {},
2574 |      "output_type": "display_data"
2575 |     },
2576 |     {
2577 |      "name": "stdout",
2578 |      "output_type": "stream",
2579 |      "text": [
2580 |       "\n",
2581 |       "ending worker 48\n",
2582 |       "starting worker 71\n",
2583 |       "\n"
2584 |      ]
2585 |     },
2586 |     {
2587 |      "data": {
2588 |       "application/vnd.jupyter.widget-view+json": {
2589 |        "model_id": "6fc94bb950184496838bd2c55c65ead5",
2590 |        "version_major": 2,
2591 |        "version_minor": 0
2592 |       },
2593 |       "text/plain": [
2594 |        "HBox(children=(IntProgress(value=0, max=39027), HTML(value='')))"
2595 |       ]
2596 |      },
2597 |      "metadata": {},
2598 |      "output_type": "display_data"
2599 |     },
2600 |     {
2601 |      "name": "stdout",
2602 |      "output_type": "stream",
2603 |      "text": [
2604 |       "\n",
2605 |       "ending worker 52\n",
2606 |       "starting worker 72\n"
2607 |      ]
2608 |     },
2609 |     {
2610 |      "data": {
2611 |       "application/vnd.jupyter.widget-view+json": {
2612 |        "model_id": "217c635a9c674c0181a4422eea7281b9",
2613 |        "version_major": 2,
2614 |        "version_minor": 0
2615 |       },
2616 |       "text/plain": [
2617 |        "HBox(children=(IntProgress(value=0, max=39077), HTML(value='')))"
2618 |       ]
2619 |      },
2620 |      "metadata": {},
2621 |      "output_type": "display_data"
2622 |     },
2623 |     {
2624 |      "name": "stdout",
2625 |      "output_type": "stream",
2626 |      "text": [
2627 |       "\n",
2628 |       "ending worker 57\n",
2629 |       "starting worker 73\n",
2630 |       "ending worker 45\n",
2631 |       "starting worker 74\n"
2632 |      ]
2633 |     },
2634 |     {
2635 |      "data": {
2636 |       "application/vnd.jupyter.widget-view+json": {
2637 |        "model_id": "de823677005245d19eec0fafdf195500",
2638 |        "version_major": 2,
2639 |        "version_minor": 0
2640 |       },
2641 |       "text/plain": [
2642 |        "HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))"
2643 |       ]
2644 |      },
2645 |      "metadata": {},
2646 |      "output_type": "display_data"
2647 |     },
2648 |     {
2649 |      "name": "stdout",
2650 |      "output_type": "stream",
2651 |      "text": [
2652 |       "\n"
2653 |      ]
2654 |     },
2655 |     {
2656 |      "data": {
2657 |       "application/vnd.jupyter.widget-view+json": {
2658 |        "model_id": "07fbefd5d7494cbfbc74738ed7a8c74b",
2659 |        "version_major": 2,
2660 |        "version_minor": 0
2661 |       },
2662 |       "text/plain": [
2663 |        "HBox(children=(IntProgress(value=0, max=39061), HTML(value='')))"
2664 |       ]
2665 |      },
2666 |      "metadata": {},
2667 |      "output_type": "display_data"
2668 |     },
2669 |     {
2670 |      "name": "stdout",
2671 |      "output_type": "stream",
2672 |      "text": [
2673 |       "ending worker 50\n",
2674 |       "starting worker 75\n"
2675 |      ]
2676 |     },
2677 |     {
2678 |      "data": {
2679 |       "application/vnd.jupyter.widget-view+json": {
2680 |        "model_id": "bf21810e00cc4107bde7d915c8561d42",
2681 |        "version_major": 2,
2682 |        "version_minor": 0
2683 |       },
2684 |       "text/plain": [
2685 |        "HBox(children=(IntProgress(value=0, max=39030), HTML(value='')))"
2686 |       ]
2687 |      },
2688 |      "metadata": {},
2689 |      "output_type": "display_data"
2690 |     },
2691 |     {
2692 |      "name": "stdout",
2693 |      "output_type": "stream",
2694 |      "text": [
2695 |       "ending worker 53\n",
2696 |       "starting worker 76\n"
2697 |      ]
2698 |     },
2699 |     {
2700 |      "data": {
2701 |       "application/vnd.jupyter.widget-view+json": {
2702 |        "model_id": "6228190b82664e1ea3dd9a134bb7ec6c",
2703 |        "version_major": 2,
2704 |        "version_minor": 0
2705 |       },
2706 |       "text/plain": [
2707 |        "HBox(children=(IntProgress(value=0, max=39092), HTML(value='')))"
2708 |       ]
2709 |      },
2710 |      "metadata": {},
2711 |      "output_type": "display_data"
2712 |     },
2713 |     {
2714 |      "name": "stdout",
2715 |      "output_type": "stream",
2716 |      "text": [
2717 |       "\n",
2718 |       "ending worker 55\n",
2719 |       "starting worker 77\n"
2720 |      ]
2721 |     },
2722 |     {
2723 |      "data": {
2724 |       "application/vnd.jupyter.widget-view+json": {
2725 |        "model_id": "8735b6bdf960436ba8bcfdd42e7195c6",
2726 |        "version_major": 2,
2727 |        "version_minor": 0
2728 |       },
2729 |       "text/plain": [
2730 |        "HBox(children=(IntProgress(value=0, max=39061), HTML(value='')))"
2731 |       ]
2732 |      },
2733 |      "metadata": {},
2734 |      "output_type": "display_data"
2735 |     },
2736 |     {
2737 |      "name": "stdout",
2738 |      "output_type": "stream",
2739 |      "text": [
2740 |       "\n",
2741 |       "ending worker 46\n",
2742 |       "starting worker 78\n"
2743 |      ]
2744 |     },
2745 |     {
2746 |      "data": {
2747 |       "application/vnd.jupyter.widget-view+json": {
2748 |        "model_id": "2f1f2e288f4744c6a9a3fc251e09281a",
2749 |        "version_major": 2,
2750 |        "version_minor": 0
2751 |       },
2752 |       "text/plain": [
2753 |        "HBox(children=(IntProgress(value=0, max=39064), HTML(value='')))"
2754 |       ]
2755 |      },
2756 |      "metadata": {},
2757 |      "output_type": "display_data"
2758 |     },
2759 |     {
2760 |      "name": "stdout",
2761 |      "output_type": "stream",
2762 |      "text": [
2763 |       "\n",
2764 |       "ending worker 49\n",
2765 |       "starting worker 79\n"
2766 |      ]
2767 |     },
2768 |     {
2769 |      "data": {
2770 |       "application/vnd.jupyter.widget-view+json": {
2771 |        "model_id": "452f02475ed7405b8596ec5e16cbf8fb",
2772 |        "version_major": 2,
2773 |        "version_minor": 0
2774 |       },
2775 |       "text/plain": [
2776 |        "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))"
2777 |       ]
2778 |      },
2779 |      "metadata": {},
2780 |      "output_type": "display_data"
2781 |     },
2782 |     {
2783 |      "name": "stderr",
2784 |      "output_type": "stream",
2785 |      "text": [
2786 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2787 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2788 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2789 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2790 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2791 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2792 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2793 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2794 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2795 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
2796 |      ]
2797 |     },
2798 |     {
2799 |      "name": "stdout",
2800 |      "output_type": "stream",
2801 |      "text": [
2802 |       "\n",
2803 |       "ending worker 60\n",
2804 |       "starting worker 80\n"
2805 |      ]
2806 |     },
2807 |     {
2808 |      "data": {
2809 |       "application/vnd.jupyter.widget-view+json": {
2810 |        "model_id": "cad046af0bef46dc8fcd4acc7739b47d",
2811 |        "version_major": 2,
2812 |        "version_minor": 0
2813 |       },
2814 |       "text/plain": [
2815 |        "HBox(children=(IntProgress(value=0, max=39073), HTML(value='')))"
2816 |       ]
2817 |      },
2818 |      "metadata": {},
2819 |      "output_type": "display_data"
2820 |     },
2821 |     {
2822 |      "name": "stdout",
2823 |      "output_type": "stream",
2824 |      "text": [
2825 |       "\n",
2826 |       "ending worker 61\n",
2827 |       "starting worker 81\n"
2828 |      ]
2829 |     },
2830 |     {
2831 |      "data": {
2832 |       "application/vnd.jupyter.widget-view+json": {
2833 |        "model_id": "ec8f7366b0fb4b4486ebfc4532a2c603",
2834 |        "version_major": 2,
2835 |        "version_minor": 0
2836 |       },
2837 |       "text/plain": [
2838 |        "HBox(children=(IntProgress(value=0, max=39080), HTML(value='')))"
2839 |       ]
2840 |      },
2841 |      "metadata": {},
2842 |      "output_type": "display_data"
2843 |     },
2844 |     {
2845 |      "name": "stderr",
2846 |      "output_type": "stream",
2847 |      "text": [
2848 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2849 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2850 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2851 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2852 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2853 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2854 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2855 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
2856 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
2857 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
2858 |      ]
2859 |     },
2860 |     {
2861 |      "name": "stdout",
2862 |      "output_type": "stream",
2863 |      "text": [
2864 |       "\n",
2865 |       "ending worker 62\n",
2866 |       "starting worker 82\n"
2867 |      ]
2868 |     },
2869 |     {
2870 |      "data": {
2871 |       "application/vnd.jupyter.widget-view+json": {
2872 |        "model_id": "dd9968071b35490ca3a0f7cb6660abf2",
2873 |        "version_major": 2,
2874 |        "version_minor": 0
2875 |       },
2876 |       "text/plain": [
2877 |        "HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))"
2878 |       ]
2879 |      },
2880 |      "metadata": {},
2881 |      "output_type": "display_data"
2882 |     },
2883 |     {
2884 |      "name": "stdout",
2885 |      "output_type": "stream",
2886 |      "text": [
2887 |       "\n",
2888 |       "ending worker 78\n",
2889 |       "starting worker 83\n"
2890 |      ]
2891 |     },
2892 |     {
2893 |      "data": {
2894 |       "application/vnd.jupyter.widget-view+json": {
2895 |        "model_id": "39cf94f0db3941b0a9236a6496ea24c2",
2896 |        "version_major": 2,
2897 |        "version_minor": 0
2898 |       },
2899 |       "text/plain": [
2900 |        "HBox(children=(IntProgress(value=0, max=39100), HTML(value='')))"
2901 |       ]
2902 |      },
2903 |      "metadata": {},
2904 |      "output_type": "display_data"
2905 |     },
2906 |     {
2907 |      "name": "stdout",
2908 |      "output_type": "stream",
2909 |      "text": [
2910 |       "\n",
2911 |       "ending worker 75\n",
2912 |       "starting worker 84\n",
2913 |       "\n"
2914 |      ]
2915 |     },
2916 |     {
2917 |      "data": {
2918 |       "application/vnd.jupyter.widget-view+json": {
2919 |        "model_id": "f4947b4fb04d4f1f8743e78c864220ed",
2920 |        "version_major": 2,
2921 |        "version_minor": 0
2922 |       },
2923 |       "text/plain": [
2924 |        "HBox(children=(IntProgress(value=0, max=39063), HTML(value='')))"
2925 |       ]
2926 |      },
2927 |      "metadata": {},
2928 |      "output_type": "display_data"
2929 |     },
2930 |     {
2931 |      "name": "stdout",
2932 |      "output_type": "stream",
2933 |      "text": [
2934 |       "\n",
2935 |       "ending worker 76\n",
2936 |       "starting worker 85\n"
2937 |      ]
2938 |     },
2939 |     {
2940 |      "data": {
2941 |       "application/vnd.jupyter.widget-view+json": {
2942 |        "model_id": "0c798c35b32844c1adfe862fd6f457c6",
2943 |        "version_major": 2,
2944 |        "version_minor": 0
2945 |       },
2946 |       "text/plain": [
2947 |        "HBox(children=(IntProgress(value=0, max=39064), HTML(value='')))"
2948 |       ]
2949 |      },
2950 |      "metadata": {},
2951 |      "output_type": "display_data"
2952 |     },
2953 |     {
2954 |      "name": "stdout",
2955 |      "output_type": "stream",
2956 |      "text": [
2957 |       "ending worker 74\n",
2958 |       "starting worker 86\n",
2959 |       "\n"
2960 |      ]
2961 |     },
2962 |     {
2963 |      "data": {
2964 |       "application/vnd.jupyter.widget-view+json": {
2965 |        "model_id": "26915fb5e9ab44088de9a7009a3e0ddd",
2966 |        "version_major": 2,
2967 |        "version_minor": 0
2968 |       },
2969 |       "text/plain": [
2970 |        "HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))"
2971 |       ]
2972 |      },
2973 |      "metadata": {},
2974 |      "output_type": "display_data"
2975 |     },
2976 |     {
2977 |      "name": "stdout",
2978 |      "output_type": "stream",
2979 |      "text": [
2980 |       "ending worker 77\n",
2981 |       "starting worker 87\n"
2982 |      ]
2983 |     },
2984 |     {
2985 |      "data": {
2986 |       "application/vnd.jupyter.widget-view+json": {
2987 |        "model_id": "c3f6b2c3ad1a4f6491276cdc4ed9c13c",
2988 |        "version_major": 2,
2989 |        "version_minor": 0
2990 |       },
2991 |       "text/plain": [
2992 |        "HBox(children=(IntProgress(value=0, max=39044), HTML(value='')))"
2993 |       ]
2994 |      },
2995 |      "metadata": {},
2996 |      "output_type": "display_data"
2997 |     },
2998 |     {
2999 |      "name": "stdout",
3000 |      "output_type": "stream",
3001 |      "text": [
3002 |       "\n",
3003 |       "ending worker 79\n",
3004 |       "starting worker 88\n"
3005 |      ]
3006 |     },
3007 |     {
3008 |      "data": {
3009 |       "application/vnd.jupyter.widget-view+json": {
3010 |        "model_id": "17db6d070c724a42ac909302d2cf1177",
3011 |        "version_major": 2,
3012 |        "version_minor": 0
3013 |       },
3014 |       "text/plain": [
3015 |        "HBox(children=(IntProgress(value=0, max=39036), HTML(value='')))"
3016 |       ]
3017 |      },
3018 |      "metadata": {},
3019 |      "output_type": "display_data"
3020 |     },
3021 |     {
3022 |      "name": "stdout",
3023 |      "output_type": "stream",
3024 |      "text": [
3025 |       "\n",
3026 |       "\n",
3027 |       "ending worker 66\n",
3028 |       "starting worker 89\n"
3029 |      ]
3030 |     },
3031 |     {
3032 |      "data": {
3033 |       "application/vnd.jupyter.widget-view+json": {
3034 |        "model_id": "16735bc9eb5d4311a040bb3db7ace6ae",
3035 |        "version_major": 2,
3036 |        "version_minor": 0
3037 |       },
3038 |       "text/plain": [
3039 |        "HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))"
3040 |       ]
3041 |      },
3042 |      "metadata": {},
3043 |      "output_type": "display_data"
3044 |     },
3045 |     {
3046 |      "name": "stdout",
3047 |      "output_type": "stream",
3048 |      "text": [
3049 |       "ending worker 69\n",
3050 |       "starting worker 90\n"
3051 |      ]
3052 |     },
3053 |     {
3054 |      "data": {
3055 |       "application/vnd.jupyter.widget-view+json": {
3056 |        "model_id": "69307cf8a4ef42e4983787782f56ad6e",
3057 |        "version_major": 2,
3058 |        "version_minor": 0
3059 |       },
3060 |       "text/plain": [
3061 |        "HBox(children=(IntProgress(value=0, max=28537), HTML(value='')))"
3062 |       ]
3063 |      },
3064 |      "metadata": {},
3065 |      "output_type": "display_data"
3066 |     },
3067 |     {
3068 |      "name": "stdout",
3069 |      "output_type": "stream",
3070 |      "text": [
3071 |       "\n",
3072 |       "\n",
3073 |       "\n",
3074 |       "ending worker 73\n",
3075 |       "starting worker 100\n"
3076 |      ]
3077 |     },
3078 |     {
3079 |      "data": {
3080 |       "application/vnd.jupyter.widget-view+json": {
3081 |        "model_id": "be3951fa96644d1a8417b8d4c7647df0",
3082 |        "version_major": 2,
3083 |        "version_minor": 0
3084 |       },
3085 |       "text/plain": [
3086 |        "HBox(children=(IntProgress(value=0, max=1), HTML(value='')))"
3087 |       ]
3088 |      },
3089 |      "metadata": {},
3090 |      "output_type": "display_data"
3091 |     },
3092 |     {
3093 |      "name": "stdout",
3094 |      "output_type": "stream",
3095 |      "text": [
3096 |       "\n",
3097 |       "ending worker 100\n",
3098 |       "ending worker 67\n",
3099 |       "ending worker 64\n",
3100 |       "\n"
3101 |      ]
3102 |     },
3103 |     {
3104 |      "name": "stderr",
3105 |      "output_type": "stream",
3106 |      "text": [
3107 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
3108 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
3109 |      ]
3110 |     },
3111 |     {
3112 |      "name": "stdout",
3113 |      "output_type": "stream",
3114 |      "text": [
3115 |       "\n",
3116 |       "ending worker 70\n",
3117 |       "ending worker 71\n",
3118 |       "\n",
3119 |       "\n",
3120 |       "ending worker 65\n",
3121 |       "ending worker 63\n",
3122 |       "\n",
3123 |       "ending worker 68\n",
3124 |       "\n",
3125 |       "ending worker 72\n"
3126 |      ]
3127 |     },
3128 |     {
3129 |      "name": "stderr",
3130 |      "output_type": "stream",
3131 |      "text": [
3132 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
3133 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
3134 |      ]
3135 |     },
3136 |     {
3137 |      "name": "stdout",
3138 |      "output_type": "stream",
3139 |      "text": [
3140 |       "\n",
3141 |       "ending worker 80\n",
3142 |       "\n",
3143 |       "ending worker 81\n"
3144 |      ]
3145 |     },
3146 |     {
3147 |      "name": "stderr",
3148 |      "output_type": "stream",
3149 |      "text": [
3150 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
3151 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n",
3152 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
3153 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
3154 |      ]
3155 |     },
3156 |     {
3157 |      "name": "stdout",
3158 |      "output_type": "stream",
3159 |      "text": [
3160 |       "\n",
3161 |       "ending worker 90\n"
3162 |      ]
3163 |     },
3164 |     {
3165 |      "name": "stderr",
3166 |      "output_type": "stream",
3167 |      "text": [
3168 |       "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n",
3169 |       "  if check_sorted and np.any(np.diff(t) < 0.0):\n"
3170 |      ]
3171 |     },
3172 |     {
3173 |      "name": "stdout",
3174 |      "output_type": "stream",
3175 |      "text": [
3176 |       "\n",
3177 |       "ending worker 82\n",
3178 |       "\n",
3179 |       "ending worker 88\n",
3180 |       "\n",
3181 |       "ending worker 89\n",
3182 |       "\n",
3183 |       "\n",
3184 |       "\n",
3185 |       "\n",
3186 |       "ending worker 84\n",
3187 |       "ending worker 85\n",
3188 |       "ending worker 86\n",
3189 |       "ending worker 83\n",
3190 |       "\n",
3191 |       "ending worker 87\n"
3192 |      ]
3193 |     }
3194 |    ],
3195 |    "source": [
3196 |     "params = [(i, fname) for i in range(91)]\n",
3197 |     "params.append((100, fname))\n",
3198 |     "\n",
3199 |     "if 1: \n",
3200 |     "    pool = Pool(processes=20, maxtasksperchild=1)\n",
3201 |     "    ls   = pool.map( work_test, params, chunksize=1 )\n",
3202 |     "    pool.close()\n",
3203 |     "else:\n",
3204 |     "    ls = [work_tta(param) for param in params]"
3205 |    ]
3206 |   },
3207 |   {
3208 |    "cell_type": "code",
3209 |    "execution_count": null,
3210 |    "metadata": {},
3211 |    "outputs": [],
3212 |    "source": []
3213 |   }
3214 |  ],
3215 |  "metadata": {
3216 |   "kernelspec": {
3217 |    "display_name": "Python [conda env:xgb8]",
3218 |    "language": "python",
3219 |    "name": "conda-env-xgb8-py"
3220 |   },
3221 |   "language_info": {
3222 |    "codemirror_mode": {
3223 |     "name": "ipython",
3224 |     "version": 3
3225 |    },
3226 |    "file_extension": ".py",
3227 |    "mimetype": "text/x-python",
3228 |    "name": "python",
3229 |    "nbconvert_exporter": "python",
3230 |    "pygments_lexer": "ipython3",
3231 |    "version": "3.6.5"
3232 |   }
3233 |  },
3234 |  "nbformat": 4,
3235 |  "nbformat_minor": 2
3236 | }
3237 | 


--------------------------------------------------------------------------------
/code/lgb_best.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "fname = 'lgb_1181'\n",
 10 |     "fname_base = 'base_006'\n",
 11 |     "fname_bazin = 'bazin_003'\n",
 12 |     "fname_newling = 'newling_003'"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "# number of train time augmentations.\n",
 22 |     "n_tta = 6\n",
 23 |     "\n",
 24 |     "seed = 0"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import numpy as np\n",
 34 |     "import pandas as pd\n",
 35 |     "from sklearn.model_selection import StratifiedKFold\n",
 36 |     "from sklearn.metrics import confusion_matrix\n",
 37 |     "from scipy.optimize import curve_fit\n",
 38 |     "import gc\n",
 39 |     "import matplotlib.pyplot as plt\n",
 40 |     "import seaborn as sns\n",
 41 |     "import lightgbm as lgb\n",
 42 |     "import xgboost as xgb\n",
 43 |     "import logging\n",
 44 |     "from tqdm import tqdm_notebook\n",
 45 |     "import itertools\n",
 46 |     "import pickle as pkl\n",
 47 |     "\n",
 48 |     "pd.options.display.max_columns = 400"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "import random as rn\n",
 58 |     "def init_seeds(seed):\n",
 59 |     "\n",
 60 |     "    # The below is necessary for starting Numpy generated random numbers\n",
 61 |     "    # in a well-defined initial state.\n",
 62 |     "\n",
 63 |     "    np.random.seed(seed)\n",
 64 |     "\n",
 65 |     "    # The below is necessary for starting core Python generated random numbers\n",
 66 |     "    # in a well-defined state.\n",
 67 |     "\n",
 68 |     "    rn.seed(seed)\n",
 69 |     "\n",
 70 |     "\n",
 71 |     "init_seeds(seed)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "def create_logger():\n",
 81 |     "    logger_ = logging.getLogger('main')\n",
 82 |     "    logger_.setLevel(logging.DEBUG)\n",
 83 |     "    fh = logging.FileHandler('simple_lightgbm.log')\n",
 84 |     "    fh.setLevel(logging.DEBUG)\n",
 85 |     "    ch = logging.StreamHandler()\n",
 86 |     "    ch.setLevel(logging.DEBUG)\n",
 87 |     "    formatter = logging.Formatter('[%(levelname)s]%(asctime)s:%(name)s:%(message)s')\n",
 88 |     "    fh.setFormatter(formatter)\n",
 89 |     "    ch.setFormatter(formatter)\n",
 90 |     "    # add the handlers to the logger\n",
 91 |     "    logger_.addHandler(fh)\n",
 92 |     "    logger_.addHandler(ch)\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "def get_logger():\n",
 96 |     "    return logging.getLogger('main')"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "def xgb_multi_weighted_logloss(preds, dtrain):\n",
106 |     "    labels = dtrain.get_label()\n",
107 |     "    return 'xgb_multi_weighted_loss', lgb_multi_weighted_logloss(labels, preds)\n",
108 |     "\n",
109 |     "def eval_lgb_multi_weighted_logloss(preds, train_data, n_tta=n_tta):\n",
110 |     "    \n",
111 |     "    label = train_data.get_label()\n",
112 |     "    classes = list(range(14))\n",
113 |     "    class_weight = {0: 1, 1: 2, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 2, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1}\n",
114 |     "    res = lgb_multi_weighted_logloss(label, preds, classes, class_weight, n_tta)\n",
115 |     "    \n",
116 |     "    return res\n",
117 |     "    \n",
118 |     "def lgb_multi_weighted_logloss(y_true, y_preds,\n",
119 |     "                               classes=[6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95],\n",
120 |     "                               class_weight={6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, \n",
121 |     "                                             65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1},\n",
122 |     "                               n_tta = n_tta,\n",
123 |     "):\n",
124 |     "    \n",
125 |     "    if len(np.unique(y_true)) > 14:\n",
126 |     "        classes.append(99)\n",
127 |     "        class_weight[99] = 2\n",
128 |     "    y_preds = y_preds.reshape(y_true.shape[0], len(classes), order='F')\n",
129 |     "    \n",
130 |     "    size = y_true.shape[0] // n_tta\n",
131 |     "    y_true = y_true[:size]\n",
132 |     "    y_p = np.zeros((size, len(classes)))\n",
133 |     "    \n",
134 |     "    for i in range(n_tta):\n",
135 |     "         y_p += y_preds[i * size : (i+1) * size]\n",
136 |     "    y_p /= n_tta\n",
137 |     "    # Trasform y_true in dummies\n",
138 |     "    y_ohe = pd.get_dummies(y_true)\n",
139 |     "    # Normalize rows and limit y_preds to 1e-15, 1-1e-15\n",
140 |     "    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)\n",
141 |     "    # Transform to log\n",
142 |     "    y_p_log = np.log(y_p)\n",
143 |     "    # Get the log for ones, .values is used to drop the index of DataFrames\n",
144 |     "    # Exclude class 99 for now, since there is no class99 in the training set\n",
145 |     "    # we gave a special process for that class\n",
146 |     "    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)\n",
147 |     "    # Get the number of positives for each class\n",
148 |     "    nb_pos = y_ohe.sum(axis=0).values.astype(float)\n",
149 |     "    # Weight average and divide by the number of positives\n",
150 |     "    class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])\n",
151 |     "    y_w = y_log_ones * class_arr / nb_pos\n",
152 |     "\n",
153 |     "    loss = - np.sum(y_w) / np.sum(class_arr)\n",
154 |     "    return 'wloss', loss, False\n",
155 |     "\n",
156 |     "\n",
157 |     "def multi_weighted_logloss(y_true, y_preds,\n",
158 |     "                              classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95],\n",
159 |     "    class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, \n",
160 |     "                    64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}\n",
161 |     "    ):\n",
162 |     "    \"\"\"\n",
163 |     "    @author olivier https://www.kaggle.com/ogrellier\n",
164 |     "    multi logloss for PLAsTiCC challenge\n",
165 |     "    \"\"\"\n",
166 |     "    # class_weights taken from Giba's topic : https://www.kaggle.com/titericz\n",
167 |     "    # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194\n",
168 |     "    # with Kyle Boone's post https://www.kaggle.com/kyleboone\n",
169 |     "    if len(np.unique(y_true)) > 14:\n",
170 |     "        classes.append(99)\n",
171 |     "        class_weight[99] = 2\n",
172 |     "    y_p = y_preds\n",
173 |     "    # Trasform y_true in dummies\n",
174 |     "    y_ohe = pd.get_dummies(y_true)\n",
175 |     "    # Normalize rows and limit y_preds to 1e-15, 1-1e-15\n",
176 |     "    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)\n",
177 |     "    # Transform to log\n",
178 |     "    y_p_log = np.log(y_p)\n",
179 |     "    # Get the log for ones, .values is used to drop the index of DataFrames\n",
180 |     "    # Exclude class 99 for now, since there is no class99 in the training set\n",
181 |     "    # we gave a special process for that class\n",
182 |     "    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)\n",
183 |     "    # Get the number of positives for each class\n",
184 |     "    nb_pos = y_ohe.sum(axis=0).values.astype(float)\n",
185 |     "    # Weight average and divide by the number of positives\n",
186 |     "    class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])\n",
187 |     "    y_w = y_log_ones * class_arr / nb_pos\n",
188 |     "\n",
189 |     "    loss = - np.sum(y_w) / np.sum(class_arr)\n",
190 |     "    return loss\n",
191 |     "\n",
192 |     "def save_importances(importances_):\n",
193 |     "    mean_gain = importances_[['gain', 'feature']].groupby('feature').mean()\n",
194 |     "    importances_['mean_gain'] = importances_['feature'].map(mean_gain['gain'])\n",
195 |     "    plt.figure(figsize=(8, 12))\n",
196 |     "    sns.barplot(x='gain', y='feature', data=importances_.sort_values('mean_gain', ascending=False))\n",
197 |     "    plt.tight_layout()\n",
198 |     "    plt.savefig('importances.png')"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "def get_importances(clfs):\n",
208 |     "    importances = [clf.feature_importance('gain') for clf in clfs]\n",
209 |     "    importances = np.vstack(importances)\n",
210 |     "    mean_gain = np.mean(importances, axis=0)\n",
211 |     "    features = clfs[0].feature_name()\n",
212 |     "    data = pd.DataFrame({'gain':mean_gain, 'feature':features})\n",
213 |     "    plt.figure(figsize=(8, 30))\n",
214 |     "    sns.barplot(x='gain', y='feature', data=data.sort_values('gain', ascending=False))\n",
215 |     "    plt.tight_layout()\n",
216 |     "    plt.savefig('importances.png')\n",
217 |     "    return data\n",
218 |     "\n",
219 |     "def train_classifiers(lgb_params, full_train=None, y=None, w=None, verbose=2000, \n",
220 |     "                      folds=5, ttas=None):\n",
221 |     "    print(full_train.shape[1], 'features')\n",
222 |     "    kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=1)\n",
223 |     "    clfs = []\n",
224 |     "    importances = pd.DataFrame()\n",
225 |     "    oof_preds = np.zeros((len(full_train), np.unique(y).shape[0]))\n",
226 |     "    tta_preds = np.zeros((len(full_train), np.unique(y).shape[0]))\n",
227 |     "    for fold_, (trn_, val_) in tqdm_notebook(enumerate(kf.split(y, y)), total=folds):\n",
228 |     "        print()\n",
229 |     "        print('fold %2d' % fold_)\n",
230 |     "        trn_x, trn_y, trn_w = full_train.iloc[trn_], y.iloc[trn_], w.iloc[trn_]\n",
231 |     "        val_x, val_y, val_w = full_train.iloc[val_], y.iloc[val_], w.iloc[val_]\n",
232 |     "        size = val_y.shape[0]\n",
233 |     "        for tta in ttas:\n",
234 |     "            tta_x = tta.iloc[trn_]\n",
235 |     "            trn_x = pd.concat((trn_x, tta_x), axis=0)\n",
236 |     "            tta_y = y.iloc[trn_]\n",
237 |     "            trn_y = pd.concat((trn_y, tta_y), axis=0)\n",
238 |     "            tta_w = w.iloc[trn_]\n",
239 |     "            trn_w = pd.concat((trn_w, tta_w), axis=0)\n",
240 |     "            val_x = pd.concat((val_x, tta.iloc[val_]), axis=0)\n",
241 |     "            val_y = pd.concat((val_y, y.iloc[val_]), axis=0)\n",
242 |     "            val_w = pd.concat((val_w, w.iloc[val_]), axis=0)\n",
243 |     "        trn_x = lgb.Dataset(trn_x, label=trn_y, weight=trn_w)\n",
244 |     "        val_x = lgb.Dataset(val_x, label=val_y, weight=val_w)\n",
245 |     "        clf = lgb.train(\n",
246 |     "            lgb_params,\n",
247 |     "            trn_x, \n",
248 |     "            num_boost_round = 4000,\n",
249 |     "            valid_sets=[trn_x, val_x],\n",
250 |     "            valid_names = ['train', 'val'],\n",
251 |     "            feval=eval_lgb_multi_weighted_logloss,\n",
252 |     "            verbose_eval=verbose,\n",
253 |     "            early_stopping_rounds=100\n",
254 |     "        )\n",
255 |     "        val_x = full_train.iloc[val_]\n",
256 |     "        \n",
257 |     "        oof_pred = clf.predict(val_x)\n",
258 |     "        oof_preds[val_, :] = oof_pred\n",
259 |     "        tta_pred = np.zeros(oof_pred.shape)\n",
260 |     "        for tta in ttas:\n",
261 |     "            val_x = tta.iloc[val_]\n",
262 |     "            tta_pred += clf.predict(val_x)\n",
263 |     "        tta_pred /= len(ttas)\n",
264 |     "        tta_preds[val_, :] = tta_pred\n",
265 |     "        print('val mwloss: %0.3f' % multi_weighted_logloss( y.iloc[val_], oof_pred),\n",
266 |     "              'tta mwloss: %0.3f' % multi_weighted_logloss( y.iloc[val_], tta_pred),\n",
267 |     "             )\n",
268 |     "        \n",
269 |     "        clfs.append(clf)\n",
270 |     "\n",
271 |     "    get_logger().info('MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))\n",
272 |     "\n",
273 |     "    importances = get_importances(clfs)\n",
274 |     "    return clfs, importances, oof_preds, tta_preds"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "def get_data(full_train, train_bazin, newling):\n",
284 |     "    # get the right object_ids for the result.\n",
285 |     "    full_train = full_train.merge(train_bazin, how='left', on='object_id')\n",
286 |     "    full_train = full_train.merge(newling, how='left', on='object_id')\n",
287 |     "    for pb in range(6):\n",
288 |     "        full_train['bazin_A_%d' % pb] *= full_train.hostgal_photoz **2 * full_train.scale_mean\n",
289 |     "        full_train['newling_A_%d' % pb] *= full_train.hostgal_photoz **2 * full_train.scale_mean\n",
290 |     "    full_train['bazin_magnitude'] = full_train[['bazin_A_%d' % pb  for pb in range(6)]].max(axis=1)\n",
291 |     "    full_train['newling_magnitude'] = full_train[['newling_A_%d' % pb  for pb in range(6)]].max(axis=1)\n",
292 |     "    for pb in range(6):\n",
293 |     "        full_train['bazin_A_%d' % pb] /= full_train['bazin_magnitude']\n",
294 |     "        full_train['newling_A_%d' % pb] /= full_train['newling_magnitude']\n",
295 |     "    full_train.hostgal_photoz = 1*(full_train.hostgal_photoz > 0)\n",
296 |     "    return full_train"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "with open('../data/ttas_%s.pkl' % fname_base, 'rb') as file:\n",
306 |     "    ttas = pkl.load(file)\n",
307 |     "full_train = ttas[0]\n",
308 |     "ttas = ttas[1 : n_tta]\n",
309 |     "full_train.head()"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": [
318 |     "meta_cols = ['object_id', 'hostgal_photoz', 'mwebv', 'target']\n",
319 |     "meta_train = pd.read_csv('../data/train_meta.csv')[meta_cols]\n",
320 |     "meta_cols = ['object_id', 'hostgal_photoz', 'mwebv']\n",
321 |     "meta_test = pd.read_csv('../input/test_set_metadata.csv')[meta_cols]\n",
322 |     "\n",
323 |     "meta_train.head()"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "with open('../data/tta_0_%s.pkl' % fname_bazin, 'rb') as file:\n",
333 |     "    train_bazin = pkl.load(file)\n",
334 |     "train_bazin.head()\n",
335 |     "    "
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": null,
341 |    "metadata": {},
342 |    "outputs": [],
343 |    "source": [
344 |     "ttas_bazin = []\n",
345 |     "for i in range(1, n_tta):\n",
346 |     "    with open('../data/tta_%d_%s.pkl' % (i, fname_bazin), 'rb') as file:\n",
347 |     "        ttas_bazin.append(pkl.load(file))"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": null,
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "ttas_bazin[0].head()"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": null,
362 |    "metadata": {},
363 |    "outputs": [],
364 |    "source": [
365 |     "with open('../data/tta_0_%s.pkl' % fname_newling, 'rb') as file:\n",
366 |     "    train_newling = pkl.load(file)\n",
367 |     "train_newling.head()"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": null,
373 |    "metadata": {},
374 |    "outputs": [],
375 |    "source": [
376 |     "ttas_newling = []\n",
377 |     "for i in range(1, n_tta):\n",
378 |     "    with open('../data/tta_%d_%s.pkl' % (i, fname_newling ), 'rb') as file:\n",
379 |     "        ttas_newling.append(pkl.load(file))"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": null,
385 |    "metadata": {},
386 |    "outputs": [],
387 |    "source": [
388 |     "full_train = get_data(full_train, train_bazin, train_newling)\n",
389 |     "full_train.head()"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": null,
395 |    "metadata": {},
396 |    "outputs": [],
397 |    "source": [
398 |     "n_tta = 6\n",
399 |     "init_seeds(seed)\n",
400 |     "\n",
401 |     "ttas = [get_data(full_train, train_bazin, train_newling) \\\n",
402 |     "        for full_train, train_bazin, train_newling \\\n",
403 |     "        in tqdm_notebook(zip(ttas, ttas_bazin, ttas_newling))]\n",
404 |     "#for tta in ttas:\n",
405 |     "#    tta.fillna(train_mean, inplace=True)"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": null,
411 |    "metadata": {},
412 |    "outputs": [],
413 |    "source": [
414 |     "classes = sorted(np.unique(meta_train.target))\n",
415 |     "classes\n",
416 |     "\n",
417 |     "class_names = ['class_%d' % c for c in classes]\n",
418 |     "\n",
419 |     "weights = [1/18  if i not in [15, 64, 99] else 1/9 for i in classes]\n",
420 |     "weights\n",
421 |     "\n",
422 |     "df = meta_train.groupby('target').object_id.count().to_frame('freq')\n",
423 |     "df.freq /= df.freq.sum()\n",
424 |     "df['weight'] = weights\n",
425 |     "df['adjust'] = df.weight / df.freq\n",
426 |     "df"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {
433 |     "scrolled": true
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "y = meta_train['target']\n",
438 |     "\n",
439 |     "ws = y.copy()\n",
440 |     "for c,w in zip(classes, df.adjust.values):\n",
441 |     "    print(c, w)\n",
442 |     "    ws[y == c] = w"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": null,
448 |    "metadata": {},
449 |    "outputs": [],
450 |    "source": [
451 |     "y_lgb = y.copy()\n",
452 |     "for i,c in enumerate(classes):\n",
453 |     "    y_lgb[y_lgb == c] = i"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {
460 |     "scrolled": true
461 |    },
462 |    "outputs": [],
463 |    "source": [
464 |     "list(full_train.columns)"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "code",
469 |    "execution_count": null,
470 |    "metadata": {},
471 |    "outputs": [],
472 |    "source": [
473 |     "removed = [\n",
474 |     "    'object_id',\n",
475 |     "    'target',\n",
476 |     "    \n",
477 |     "    'newling_A_1',\n",
478 |     "    'newling_A_2',\n",
479 |     "    'newling_A_3',\n",
480 |     "    'newling_A_4',\n",
481 |     "    'newling_A_5',\n",
482 |     "    'newling_k_1',\n",
483 |     "    'newling_k_2',\n",
484 |     "    'newling_k_3',\n",
485 |     "    'newling_k_4',\n",
486 |     "    'newling_k_5',\n",
487 |     "    \n",
488 |     "    'bazin_pcov_0',\n",
489 |     "    'bazin_pcov_1',\n",
490 |     "    'bazin_pcov_2',\n",
491 |     "    'bazin_pcov_3',\n",
492 |     "    'bazin_pcov_4',\n",
493 |     "    'bazin_pcov_5',\n",
494 |     "    'bazin_max_0',\n",
495 |     "    'bazin_max_1',\n",
496 |     "    'bazin_max_2',\n",
497 |     "    'bazin_max_3',\n",
498 |     "    'bazin_max_4',\n",
499 |     "    'bazin_max_5',\n",
500 |     "    #'bazin_A_0',\n",
501 |     "    #'bazin_A_1',\n",
502 |     "    #'bazin_A_2',\n",
503 |     "    #'bazin_A_3',\n",
504 |     "    #'bazin_A_4',\n",
505 |     "    #'bazin_A_5',\n",
506 |     "    'bazin_before_0',\n",
507 |     "    'bazin_before_1',\n",
508 |     "    'bazin_before_2',\n",
509 |     "    'bazin_before_3',\n",
510 |     "    'bazin_before_4',\n",
511 |     "    'bazin_before_5',\n",
512 |     "    'bazin_after_0',\n",
513 |     "    'bazin_after_1',\n",
514 |     "    'bazin_after_2',\n",
515 |     "    'bazin_after_3',\n",
516 |     "    'bazin_after_4',\n",
517 |     "    'bazin_after_5',\n",
518 |     "    \n",
519 |     "    #'bazin_trise',\n",
520 |     "    \n",
521 |     "    'mwebv',\n",
522 |     "    'num_obs',\n",
523 |     "]\n",
524 |     "\n",
525 |     "features = [c for c in full_train.columns if c not in removed]\n",
526 |     "\n",
527 |     "features\n",
528 |     "\n",
529 |     "full_train1 = full_train[features].copy()\n",
530 |     "ttas1 = [tta[features].copy() for tta in ttas]"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": null,
536 |    "metadata": {},
537 |    "outputs": [],
538 |    "source": [
539 |     "features"
540 |    ]
541 |   },
542 |   {
543 |    "cell_type": "code",
544 |    "execution_count": null,
545 |    "metadata": {
546 |     "scrolled": true
547 |    },
548 |    "outputs": [],
549 |    "source": [
550 |     "lgb_params = {\n",
551 |     "    'boosting_type': 'goss',\n",
552 |     "    'objective': 'multiclass',\n",
553 |     "    'num_class': 14,\n",
554 |     "    'metric': 'None',\n",
555 |     "    'learning_rate': 0.015,\n",
556 |     "    'colsample_bytree': .5,\n",
557 |     "    'feature_fraction_seed':seed+2,\n",
558 |     "    'reg_alpha': .01,\n",
559 |     "    'reg_lambda': .1,\n",
560 |     "    'min_split_gain': 0.1,\n",
561 |     "    'min_child_weight': 20 * (1 + len(ttas)),\n",
562 |     "    #'n_estimators': 4000,\n",
563 |     "    #'silent': -1,\n",
564 |     "    'verbose': -1,\n",
565 |     "    #'max_depth': 4,\n",
566 |     "    'num_leaves' : 7,\n",
567 |     "    #'num_threads': 10,\n",
568 |     "}\n",
569 |     "clfs, importances, oof_preds, tta_preds = train_classifiers(lgb_params, full_train1, y_lgb, ws, \n",
570 |     "                                                            folds=10, ttas=ttas1)\n",
571 |     "\n",
572 |     "#save_importances(importances_=importances)\n",
573 |     "print('%0.5f' % multi_weighted_logloss(y, oof_preds), \n",
574 |     "      '%0.5f' % multi_weighted_logloss(y, tta_preds))"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "code",
579 |    "execution_count": null,
580 |    "metadata": {},
581 |    "outputs": [],
582 |    "source": [
583 |     "with open('../data/oof_preds_%s.pkl' % fname, 'wb') as file:\n",
584 |     "    pkl.dump(oof_preds, file)\n",
585 |     "    \n",
586 |     "with open('../data/tta_preds_%s.pkl' % fname, 'wb') as file:\n",
587 |     "    pkl.dump(tta_preds, file)"
588 |    ]
589 |   },
590 |   {
591 |    "cell_type": "code",
592 |    "execution_count": null,
593 |    "metadata": {},
594 |    "outputs": [],
595 |    "source": [
596 |     "def predict_chunk(clfs_, features, class_names, chunk_id, fname_base, \n",
597 |     "                  fname_bazin=fname_bazin, fname_newling=fname_newling, \n",
598 |     "                  ):\n",
599 |     "\n",
600 |     "    with open('../data/full_test_chunk_%s_%d.pkl' % (fname_base, chunk_id), 'rb') as file:\n",
601 |     "        full_test = pkl.load(file)\n",
602 |     "        \n",
603 |     "    with open('../data/bazin_test_%d_%s.pkl' % (chunk_id, fname_bazin), 'rb') as file:\n",
604 |     "        test_bazin = pkl.load(file)\n",
605 |     "    \n",
606 |     "    with open('../data/test_%d_%s.pkl' % (chunk_id, fname_newling), 'rb') as file:\n",
607 |     "            test_newling = pkl.load(file)  \n",
608 |     "            \n",
609 |     "    if ('newling_sigma_1') not in test_newling.columns:\n",
610 |     "        test_newling['newling_sigma_1'] = np.NaN\n",
611 |     "            \n",
612 |     "    full_test = get_data(full_test, test_bazin, test_newling)\n",
613 |     "    #full_test = full_test.fillna(train_mean)\n",
614 |     "    # Make predictions\n",
615 |     "    preds_ = None\n",
616 |     "    for clf in clfs_:\n",
617 |     "        if preds_ is None:\n",
618 |     "            preds_ = clf.predict(full_test[features]) / len(clfs_)\n",
619 |     "        else:\n",
620 |     "            preds_ += clf.predict(full_test[features]) / len(clfs_)\n",
621 |     "\n",
622 |     "    # Compute preds_99 as the proba of class not being any of the others\n",
623 |     "    # preds_99 = 0.1 gives 1.769\n",
624 |     "    preds_99 = np.ones(preds_.shape[0])\n",
625 |     "    for i in range(preds_.shape[1]):\n",
626 |     "        preds_99 *= (1 - preds_[:, i])\n",
627 |     "    \n",
628 |     "    # Create DataFrame from predictions\n",
629 |     "    preds_df_ = pd.DataFrame(preds_, columns=class_names)\n",
630 |     "    preds_df_['object_id'] = full_test['object_id']\n",
631 |     "    preds_df_['class_99'] = preds_99\n",
632 |     "\n",
633 |     "    print(preds_df_['class_99'].mean())\n",
634 |     "\n",
635 |     "    del full_test, preds_\n",
636 |     "    gc.collect()\n",
637 |     "\n",
638 |     "    return preds_df_\n"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": null,
644 |    "metadata": {
645 |     "scrolled": true
646 |    },
647 |    "outputs": [],
648 |    "source": [
649 |     "import time\n",
650 |     "\n",
651 |     "start = time.time()\n",
652 |     "chunks = 5000000\n",
653 |     "remain_df = None\n",
654 |     "\n",
655 |     "for i_c in tqdm_notebook(range(91)):\n",
656 |     "\n",
657 |     "    preds_df = predict_chunk(clfs_=clfs,\n",
658 |     "                             features=features,\n",
659 |     "                             class_names=class_names,\n",
660 |     "                             chunk_id=i_c,\n",
661 |     "                             fname_base=fname_base,\n",
662 |     "                            )\n",
663 |     "\n",
664 |     "    if i_c == 0:\n",
665 |     "        print(preds_df.mean(axis=0))\n",
666 |     "        preds_df.to_csv('../submissions/%s.csv' %fname, header=True, index=False, float_format='%.6f')\n",
667 |     "    else:\n",
668 |     "        preds_df.to_csv('../submissions/%s.csv' %fname, header=False, mode='a', index=False, float_format='%.6f')\n",
669 |     "\n",
670 |     "    del preds_df\n",
671 |     "    gc.collect()\n",
672 |     "\n",
673 |     "    if (i_c + 1) % 10 == 0:\n",
674 |     "        get_logger().info('%15d done in %5.1f' % (chunks * (i_c + 1), (time.time() - start) / 60))\n",
675 |     "        print('%15d done in %5.1f' % (chunks * (i_c + 1), (time.time() - start) / 60))\n",
676 |     "\n",
677 |     "# Compute last object in remain_df\n",
678 |     "\n",
679 |     "preds_df = predict_chunk(clfs_=clfs,\n",
680 |     "                         rnn_test=rnn_test,\n",
681 |     "                         features=features,\n",
682 |     "                         class_names=class_names,\n",
683 |     "                         chunk_id=100,\n",
684 |     "                         fname_base=fname_base,\n",
685 |     "                        )\n",
686 |     "\n",
687 |     "preds_df.to_csv('../submissions/%s.csv' %fname, \n",
688 |     "                header=False, mode='a', index=False, float_format='%.6f')"
689 |    ]
690 |   },
691 |   {
692 |    "cell_type": "code",
693 |    "execution_count": null,
694 |    "metadata": {},
695 |    "outputs": [],
696 |    "source": [
697 |     "z = pd.read_csv('../submissions/%s.csv' %fname)\n",
698 |     "\n",
699 |     "z = z.groupby('object_id').mean()\n",
700 |     "\n",
701 |     "z.shape"
702 |    ]
703 |   },
704 |   {
705 |    "cell_type": "code",
706 |    "execution_count": null,
707 |    "metadata": {},
708 |    "outputs": [],
709 |    "source": [
710 |     "meta_cols = ['hostgal_photoz', 'target']\n",
711 |     "meta_train2 = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n",
712 |     "meta_train2.head()\n",
713 |     "\n",
714 |     "df = meta_train2.groupby('target').hostgal_photoz.mean()\n",
715 |     "\n",
716 |     "galactic = ['class_%d' % c for c in df[df == 0].index]\n",
717 |     "extragal = ['class_%d' % c for c in df[df > 0].index]\n",
718 |     "galactic, extragal"
719 |    ]
720 |   },
721 |   {
722 |    "cell_type": "code",
723 |    "execution_count": null,
724 |    "metadata": {},
725 |    "outputs": [],
726 |    "source": [
727 |     "z = z.reset_index()\n",
728 |     "\n",
729 |     "z['class_99'] *= (0.18 / z['class_99'].mean())\n",
730 |     "\n",
731 |     "z.loc[meta_test.hostgal_photoz == 0, extragal] = 0\n",
732 |     "\n",
733 |     "z.loc[meta_test.hostgal_photoz > 0, galactic] = 0\n",
734 |     "\n",
735 |     "z.mean(axis=0)"
736 |    ]
737 |   },
738 |   {
739 |    "cell_type": "code",
740 |    "execution_count": null,
741 |    "metadata": {},
742 |    "outputs": [],
743 |    "source": [
744 |     "#z.to_csv('../submissions/gal_%s.csv' %fname, index=False, float_format='%.6f')\n",
745 |     "\n",
746 |     "z['class_99'] = (1. - z[z.columns[1:-1]]).prod(axis=1)\n",
747 |     "\n",
748 |     "z.mean(axis=0)"
749 |    ]
750 |   },
751 |   {
752 |    "cell_type": "code",
753 |    "execution_count": null,
754 |    "metadata": {},
755 |    "outputs": [],
756 |    "source": [
757 |     "z['class_99'] *= (0.18 / z['class_99'].mean())\n",
758 |     "\n",
759 |     "z.to_csv('../submissions/gal_2_%s.csv' %fname, index=False, float_format='%.6f')"
760 |    ]
761 |   }
762 |  ],
763 |  "metadata": {
764 |   "kernelspec": {
765 |    "display_name": "Python [conda env:xgb8]",
766 |    "language": "python",
767 |    "name": "conda-env-xgb8-py"
768 |   },
769 |   "language_info": {
770 |    "codemirror_mode": {
771 |     "name": "ipython",
772 |     "version": 3
773 |    },
774 |    "file_extension": ".py",
775 |    "mimetype": "text/x-python",
776 |    "name": "python",
777 |    "nbconvert_exporter": "python",
778 |    "pygments_lexer": "ipython3",
779 |    "version": "3.6.5"
780 |   }
781 |  },
782 |  "nbformat": 4,
783 |  "nbformat_minor": 2
784 | }
785 | 


--------------------------------------------------------------------------------
/code/test_chunks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "fname='test'\n",
 10 |     "seed=0"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 9,
 16 |    "metadata": {
 17 |     "scrolled": true
 18 |    },
 19 |    "outputs": [
 20 |     {
 21 |      "data": {
 22 |       "application/vnd.jupyter.widget-view+json": {
 23 |        "model_id": "371992443bc34c6aaf2a29ea0ee5ba46",
 24 |        "version_major": 2,
 25 |        "version_minor": 0
 26 |       },
 27 |       "text/plain": [
 28 |        "HBox(children=(IntProgress(value=0, max=91), HTML(value='')))"
 29 |       ]
 30 |      },
 31 |      "metadata": {},
 32 |      "output_type": "display_data"
 33 |     },
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "import numpy as np\n",
 44 |     "import pandas as pd\n",
 45 |     "from sklearn.model_selection import StratifiedKFold\n",
 46 |     "from sklearn.metrics import confusion_matrix\n",
 47 |     "import gc\n",
 48 |     "import matplotlib.pyplot as plt\n",
 49 |     "import seaborn as sns\n",
 50 |     "import lightgbm as lgb\n",
 51 |     "import xgboost as xgb\n",
 52 |     "import logging\n",
 53 |     "from tqdm import tqdm_notebook\n",
 54 |     "import itertools\n",
 55 |     "\n",
 56 |     "import pickle as pkl\n",
 57 |     "\n",
 58 |     "import time\n",
 59 |     "\n",
 60 |     "start = time.time()\n",
 61 |     "chunks = 5000000\n",
 62 |     "remain_df = None\n",
 63 |     "\n",
 64 |     "for i_c, df in tqdm_notebook(enumerate(pd.read_csv('../input/test_set.csv', \n",
 65 |     "                                     chunksize=chunks, \n",
 66 |     "                                     iterator=True)),\n",
 67 |     "                             total=91):\n",
 68 |     "    # Check object_ids\n",
 69 |     "    # I believe np.unique keeps the order of group_ids as they appear in the file\n",
 70 |     "    unique_ids = np.unique(df['object_id'])\n",
 71 |     "    new_remain_df = df.loc[df['object_id'] == unique_ids[-1]].copy()\n",
 72 |     "\n",
 73 |     "    if remain_df is None:\n",
 74 |     "        df = df.loc[df['object_id'].isin(unique_ids[:-1])].copy()\n",
 75 |     "    else:\n",
 76 |     "        df = pd.concat([remain_df, df.loc[df['object_id'].isin(unique_ids[:-1])]], axis=0)\n",
 77 |     "\n",
 78 |     "    # Create remaining samples df\n",
 79 |     "    remain_df = new_remain_df\n",
 80 |     "\n",
 81 |     "    with open('../input/test_chunk_%d.csv' %i_c, 'wb') as file:\n",
 82 |     "        pkl.dump(df, file)\n",
 83 |     "        \n",
 84 |     "with open('../input/test_chunk_%d.csv' %100, 'wb') as file:\n",
 85 |     "    pkl.dump(remain_df, file)\n"
 86 |    ]
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "kernelspec": {
 91 |    "display_name": "Python [conda env:xgb8]",
 92 |    "language": "python",
 93 |    "name": "conda-env-xgb8-py"
 94 |   },
 95 |   "language_info": {
 96 |    "codemirror_mode": {
 97 |     "name": "ipython",
 98 |     "version": 3
 99 |    },
100 |    "file_extension": ".py",
101 |    "mimetype": "text/x-python",
102 |    "name": "python",
103 |    "nbconvert_exporter": "python",
104 |    "pygments_lexer": "ipython3",
105 |    "version": "3.6.5"
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 2
110 | }
111 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | Directory for storing temporary data, mostly precomputed features.
2 | 


--------------------------------------------------------------------------------
/input/README.md:
--------------------------------------------------------------------------------
1 | Competition data should be put here
2 | 


--------------------------------------------------------------------------------
/submissions/README.md:
--------------------------------------------------------------------------------
1 | Directory for submisison files.
2 | 


--------------------------------------------------------------------------------