├── LICENSE ├── README.md ├── code ├── README.md ├── base_features_006_filter_error_6_std.ipynb ├── bazin_003.ipynb ├── celerite_003.ipynb ├── eda_024_bazin.ipynb ├── eda_026_newling.ipynb ├── eda_031_celerite.ipynb ├── lgb_best.ipynb ├── newling_003.ipynb └── test_chunks.ipynb ├── data └── README.md ├── input └── README.md └── submissions └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kaggle_PLAsTiCC 2 | Part of our solution to PLAsTiCC Kaggle challenge 3 | 4 | I was part of a team that finished 5th in this very challlenging Kaglle competition: https://www.kaggle.com/c/PLAsTiCC-2018 . It was challenging because we had to classify unevenly spaces time series. All time series problems I worked on before were regularly samples time series. Sure, some value could be missing, but nothing like what we have here. Moreover, it was an open classification problem, with more classes in the test data than in the train data. 5 | 6 | I describe my part of the solution here: https://www.kaggle.com/c/PLAsTiCC-2018/discussion/75050 mostly feature engineering and lightgbm models. 7 | 8 | A team mate, Kun Hao Yeh, describes his here: https://www.kaggle.com/c/PLAsTiCC-2018/discussion/75040 mostly RNN. 9 | 10 | What is missing is a description of how we stacked models, due to our third team member, SomethingIsWrong. 11 | 12 | The code in the code directory assumes that the competition data is in the input directory. The data directory is used to store additional data. 13 | 14 | The submissions directory contains files ready for submission. 15 | -------------------------------------------------------------------------------- /code/README.md: -------------------------------------------------------------------------------- 1 | Notebooks should be executed in that order: 2 | 3 | * `test_chunks.ipynb` Splits test data into chunks and stores them. 4 | * `base_features_006_filter_error_6_std.ipynb` Base features used for lightgbm models. 5 | * `bazin_003.ipynb` Fits Bazin curves for all extra galactic sources. 6 | * `newling_003.ipynb` Fits Newling curves for all extra galactic sources. 7 | * `lgb_best.ipynb` Computes a submisison file using the above features. It is derived from the best model we had that scored 0.752 on the public leaderboard. The difference with this one is that our best model used out of fold predictions from other models, mostly RNN and MLP mdoels produced by my team mates. I removed that part to make the code self contained. 8 | 9 | These notebooks aren't used for the competition solutions, but they contain pretty graphics: 10 | * `eda_031_celerite.ipynb` Fits gaussian process, generates curves from it, and display them. 11 | * `eda_024_bazin.ipynb` Fits and displays Bazin curves. 12 | * `eda_026_newling.ipynb` Fits and displays Newling curves. 13 | -------------------------------------------------------------------------------- /code/base_features_006_filter_error_6_std.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "fname='base_006'\n", 10 | "\n", 11 | "n_tta = 6\n", 12 | "\n", 13 | "seed = 0" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "from sklearn.model_selection import StratifiedKFold\n", 25 | "from sklearn.metrics import confusion_matrix\n", 26 | "from scipy.optimize import curve_fit\n", 27 | "import gc\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import seaborn as sns\n", 30 | "import lightgbm as lgb\n", 31 | "import xgboost as xgb\n", 32 | "import logging\n", 33 | "from tqdm import tqdm_notebook\n", 34 | "import itertools\n", 35 | "import pickle as pkl\n", 36 | "\n", 37 | "from multiprocessing import Pool" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import random as rn\n", 47 | "def init_seeds(seed):\n", 48 | "\n", 49 | " # The below is necessary for starting Numpy generated random numbers\n", 50 | " # in a well-defined initial state.\n", 51 | "\n", 52 | " np.random.seed(seed)\n", 53 | "\n", 54 | " # The below is necessary for starting core Python generated random numbers\n", 55 | " # in a well-defined state.\n", 56 | "\n", 57 | " rn.seed(seed)\n", 58 | "\n", 59 | "\n", 60 | "init_seeds(seed)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "def get_aggregations():\n", 70 | " aggs = {\n", 71 | " 'flux' : ['min', 'max', 'mean', 'std', 'skew'],\n", 72 | " 'flux_delta' : ['mean', 'median', 'std'],\n", 73 | " 'flux_err' : ['min', 'max', 'mean', 'median', 'std'],\n", 74 | " 'detected' : ['mean'], # ''min', 'max', 'mean', 'median', 'std'],\n", 75 | " 'flux_ratio_sq' : ['sum'],\n", 76 | " 'flux_by_flux_ratio_sq' : ['sum'],\n", 77 | " 'mjd_detected' : ['min', 'max'],\n", 78 | " 'mjd_detected_std' : ['min', 'max'],\n", 79 | " 'flux_detected' : ['mean'], # ''min', 'max', 'mean', 'median', 'std'],\n", 80 | " 'flux_slope_change' : ['mean'],\n", 81 | " 'scale':['mean'],\n", 82 | " 'magnitude':['mean'],\n", 83 | " }\n", 84 | " \n", 85 | " for pb in range(6):\n", 86 | " flux_pb = 'flux_%d' % pb\n", 87 | " aggs[flux_pb] = ['min', 'max', 'mean', 'median', 'std', 'skew']\n", 88 | " flux_delta_pb = 'flux_delta_%d' % pb\n", 89 | " aggs[flux_delta_pb] = ['std']\n", 90 | " detected_pb = 'detected_%d' % pb\n", 91 | " aggs[detected_pb] = ['mean']\n", 92 | " flux_pb_detected = 'flux_%d_detected' % pb\n", 93 | " aggs[flux_pb_detected] = ['mean']\n", 94 | " flux_ratio_sq_pb = 'flux_ratio_sq_%d' % pb\n", 95 | " aggs[flux_ratio_sq_pb] = ['sum']\n", 96 | " flux_by_flux_ratio_sq_pb = 'flux_by_flux_ratio_sq_%d' % pb\n", 97 | " aggs[flux_by_flux_ratio_sq_pb] = ['sum']\n", 98 | " return aggs\n", 99 | "\n", 100 | "\n", 101 | "def get_new_columns(aggs):\n", 102 | " return [k + '_' + agg for k in aggs.keys() for agg in aggs[k]]\n", 103 | "\n", 104 | "def apply_kurt(df):\n", 105 | " cols = ['flux'] + ['flux_%d' % pb for pb in range(6)]\n", 106 | " agg = df.groupby('object_id')[cols].apply(pd.DataFrame.kurt)\n", 107 | " agg.columns = [c+'_kurt' for c in agg.columns]\n", 108 | " return agg\n", 109 | "\n", 110 | "def apply_kurt_delta(df):\n", 111 | " cols = ['flux_delta'] \n", 112 | " agg = df.groupby('object_id')[cols].apply(pd.DataFrame.kurt)\n", 113 | " agg.columns = [c+'_kurt' for c in agg.columns]\n", 114 | " return agg\n", 115 | "\n", 116 | "def add_features_to_agg(df):\n", 117 | " df['mjd_detected_diff'] = df['mjd_detected_max'] - df['mjd_detected_min']\n", 118 | " del df['mjd_detected_max'], df['mjd_detected_min']\n", 119 | " df['mjd_detected_std_diff'] = df['mjd_detected_std_max'] - df['mjd_detected_std_min']\n", 120 | " del df['mjd_detected_std_max'], df['mjd_detected_std_min']\n", 121 | " df['flux_diff'] = df['flux_max'] - df['flux_min']\n", 122 | " df['flux_dif2'] = (df['flux_max'] - df['flux_min']) / df['flux_mean']\n", 123 | " df['flux_w_mean'] = df['flux_by_flux_ratio_sq_sum'] / df['flux_ratio_sq_sum']\n", 124 | " df['flux_dif3'] = (df['flux_max'] - df['flux_min']) / df['flux_w_mean']\n", 125 | " df['flux_detected_ratio'] = df['flux_detected_mean'] / df['flux_mean']\n", 126 | " df['flux_delta_mean_ratio'] = df['flux_delta_mean'] / df['flux_mean']\n", 127 | " df['flux_delta_std_ratio'] = df['flux_delta_std'] / df['flux_std']\n", 128 | " #df['flux_delta_skew_ratio'] = df['flux_delta_skew'] / df['flux_skew']\n", 129 | " #del df['flux_delta_skew']\n", 130 | " #df['flux_delta_kurt_ratio'] = df['flux_delta_kurt'] / df['flux_kurt']\n", 131 | " for pb in range(6):\n", 132 | " #df['flux_%d_diff' % pb] = df['flux_%d_max' % pb] - df['flux_%d_min' % pb]\n", 133 | " #df['flux_%d_diff_2' % pb] = (df['flux_%d_max' % pb] - df['flux_%d_min' % pb]) / df['flux_%d_mean' % pb]\n", 134 | "\n", 135 | " df['flux_%d_detected_ratio' % pb] = df['flux_%d_detected_mean' % pb] / df['flux_%d_mean' % pb]\n", 136 | " df['flux_%d_mean' % pb] /= df.flux_mean\n", 137 | " df['flux_%d_detected_mean' % pb] /= df.flux_detected_mean\n", 138 | " df['flux_%d_max_ratio' % pb] = df['flux_%d_max' % pb] / df['flux_max']\n", 139 | " #df['flux_%d_min_ratio' % pb] = df['flux_%d_min' % pb] / df['flux_min']\n", 140 | " #df['flux_delta_%d_std_ratio' % pb] = df['flux_delta_%d_std' % pb] / df['flux_delta_std']\n", 141 | " df['flux_delta_%d_std_ratio_2' % pb] = df['flux_delta_%d_std' % pb] / df['flux_std']\n", 142 | " #df['flux_%d_std_ratio' % pb] = df['flux_%d_std' % pb] / df['flux_std']\n", 143 | " df['flux_%d_w_mean' % pb] = df['flux_by_flux_ratio_sq_%d_sum' % pb] / df['flux_ratio_sq_%d_sum' % pb]\n", 144 | " df['flux_%d_dif3'] = (df['flux_%d_max' % pb] - df['flux_%d_min' % pb]) / df['flux_%d_w_mean' % pb]\n", 145 | " df['flux_%d_w_mean' % pb] /= df['flux_w_mean']\n", 146 | " #df['flux_ratio_sq_%d_sum' % pb] /= df['flux_ratio_sq_sum']\n", 147 | " del df['flux_delta_%d_std' % pb], df['flux_by_flux_ratio_sq_%d_sum' % pb]\n", 148 | " #del df['flux_ratio_sq_%d_sum' % pb]\n", 149 | " del df['flux_by_flux_ratio_sq_sum']\n", 150 | " return df" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "def add_features_before_agg(df):\n", 160 | " \n", 161 | " df['flux_ratio_sq'] = np.power(df['flux'] / df['flux_err'], 2.0)\n", 162 | " df['flux_by_flux_ratio_sq'] = df['flux'] * df['flux_ratio_sq']\n", 163 | " \n", 164 | " df['mjd_detected'] = np.NaN\n", 165 | " df.loc[df.detected == 1, 'mjd_detected'] = df.loc[df.detected == 1, 'mjd']\n", 166 | " \n", 167 | " df['flux_detected'] = np.NaN\n", 168 | " df.loc[df.detected == 1, 'flux_detected'] = df.loc[df.detected == 1, 'flux']\n", 169 | "\n", 170 | " df['mjd_detected_std'] = np.NaN\n", 171 | " df.loc[df.detected_std == 1, 'mjd_detected_std'] = df.loc[df.detected_std == 1, 'mjd']\n", 172 | " \n", 173 | " gr = df.groupby(['object_id', 'passband'])\n", 174 | " df['flux_prev'] = gr.flux.shift(1)\n", 175 | " df['mjd_prev'] = gr.mjd.shift(1)\n", 176 | " \n", 177 | " df['flux_delta'] = (df.flux - df.flux_prev) \n", 178 | " df['flux_delta_abs'] = np.abs(df.flux_delta)\n", 179 | "\n", 180 | " df.loc[df.flux_delta_abs * df.scale < 10, 'flux_delta'] = np.NaN\n", 181 | " df.loc[(df.mjd - df.mjd_prev) > 100, 'flux_delta'] = np.NaN\n", 182 | " df['flux_slope'] = np.sign(df.flux_delta) \n", 183 | " \n", 184 | " df['flux_slope_prev'] = gr.flux_slope.shift(1).fillna('prev')\n", 185 | " df['flux_slope_change'] = 1*(df['flux_slope'] != df['flux_slope_prev']) \n", 186 | " del df['flux_prev'], df['flux_slope_prev'], df['flux_slope'], df['mjd_prev']\n", 187 | "\n", 188 | " for pb in range(6):\n", 189 | " filter_p = (df.passband == pb)\n", 190 | " \n", 191 | " flux_pb = 'flux_%d' % pb\n", 192 | " df[flux_pb] = np.NaN\n", 193 | " df.loc[filter_p, flux_pb] = df.loc[filter_p, 'flux']\n", 194 | "\n", 195 | " flux_delta_pb = 'flux_delta_%d' % pb\n", 196 | " df[flux_delta_pb] = np.NaN\n", 197 | " df.loc[filter_p, flux_delta_pb] = df.loc[filter_p, 'flux_delta']\n", 198 | " \n", 199 | " detected_pb = 'detected_%d' % pb\n", 200 | " df[detected_pb] = 0\n", 201 | " df.loc[filter_p, detected_pb] = df.loc[filter_p, 'detected']\n", 202 | " \n", 203 | " flux_pb_detected = 'flux_%d_detected' % pb\n", 204 | " df[flux_pb_detected] = np.NaN\n", 205 | " df.loc[filter_p, flux_pb_detected] = df.loc[filter_p, 'flux_detected']\n", 206 | " \n", 207 | " flux_ratio_sq_pb = 'flux_ratio_sq_%d' % pb\n", 208 | " df[flux_ratio_sq_pb] = np.NaN\n", 209 | " df.loc[filter_p, flux_ratio_sq_pb] = df.loc[filter_p, 'flux_ratio_sq']\n", 210 | "\n", 211 | " flux_by_flux_ratio_sq_pb = 'flux_by_flux_ratio_sq_%d' % pb\n", 212 | " df[flux_by_flux_ratio_sq_pb] = np.NaN\n", 213 | " df.loc[filter_p, flux_by_flux_ratio_sq_pb] = df.loc[filter_p, 'flux_by_flux_ratio_sq']\n", 214 | "\n", 215 | " " 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 8, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "def add_features(df_, meta_, throughputs=throughputs):\n", 225 | " df_ = df_.copy()\n", 226 | " \n", 227 | " #df_['scale'] = 1\n", 228 | " gr = df_.groupby('object_id')\n", 229 | " df_['scale'] = gr.flux.transform('max')\n", 230 | " df_['magnitude'] = df_['scale'] - gr.flux.transform('min')\n", 231 | " \n", 232 | " df_.flux /= df_.scale\n", 233 | " df_.flux_err /= df_.scale\n", 234 | " \n", 235 | " gr = df_.groupby(['object_id', 'passband']) \n", 236 | " flux_err_mean = gr.flux_err.transform('mean')\n", 237 | " flux_err_std = gr.flux_err.transform('std')\n", 238 | " df_ = df_[df_.flux_err <= flux_err_mean + 6*flux_err_std].copy()\n", 239 | "\n", 240 | " gr = df_.groupby(['object_id', 'passband']) \n", 241 | " flux_std = gr.flux.transform('std')\n", 242 | " flux_mean = gr.flux.transform('mean')\n", 243 | " df_['detected_std'] = df_.detected * (df_.flux > flux_mean + 1*flux_std)\n", 244 | " \n", 245 | " add_features_before_agg(df_)\n", 246 | "\n", 247 | " aggs = get_aggregations()\n", 248 | " new_columns = get_new_columns(aggs)\n", 249 | "\n", 250 | " agg_ = df_.groupby('object_id').agg(aggs)\n", 251 | " agg_.columns = new_columns\n", 252 | "\n", 253 | " agg_ = add_features_to_agg(df=agg_)\n", 254 | " \n", 255 | " agg_kurt = apply_kurt(df_)\n", 256 | " \n", 257 | " #agg_kurt_delta = apply_kurt_delta(df_)\n", 258 | " \n", 259 | " agg_ = pd.concat([agg_, agg_kurt], axis=1).reset_index()\n", 260 | " #agg_ = agg_.merge(agg_bazin, how='left', on='object_id')\n", 261 | "\n", 262 | " # Merge with meta data\n", 263 | " full_df = agg_.merge(\n", 264 | " right=meta_,\n", 265 | " how='left',\n", 266 | " on='object_id'\n", 267 | " )\n", 268 | " full_df['magnitude_mean'] *= (full_df.hostgal_photoz ** 2)\n", 269 | "\n", 270 | " del agg_\n", 271 | " return full_df" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 9, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "# training time data augmentation\n", 281 | "\n", 282 | "def get_tta(train, meta_train, i):\n", 283 | " df = train.copy()\n", 284 | " init_seeds(i)\n", 285 | " if i > 0:\n", 286 | " df['flux'] += df['flux_err'] * np.random.randn(*df['flux_err'].shape)\n", 287 | " df = add_features(df, meta_train)\n", 288 | " \n", 289 | " return df" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 10, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/html": [ 300 | "
\n", 301 | "\n", 314 | "\n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | "
object_idmjdpassbandfluxflux_errdetected
061559750.42292-544.8103033.6229521
161559750.43061-816.4343265.5533701
261559750.43833-471.3855293.8012131
361559750.44504-388.98498511.3950311
461559752.40702-681.8588874.0412041
\n", 374 | "
" 375 | ], 376 | "text/plain": [ 377 | " object_id mjd passband flux flux_err detected\n", 378 | "0 615 59750.4229 2 -544.810303 3.622952 1\n", 379 | "1 615 59750.4306 1 -816.434326 5.553370 1\n", 380 | "2 615 59750.4383 3 -471.385529 3.801213 1\n", 381 | "3 615 59750.4450 4 -388.984985 11.395031 1\n", 382 | "4 615 59752.4070 2 -681.858887 4.041204 1" 383 | ] 384 | }, 385 | "execution_count": 10, 386 | "metadata": {}, 387 | "output_type": "execute_result" 388 | } 389 | ], 390 | "source": [ 391 | "train = pd.read_csv('../input/training_set.csv')\n", 392 | "train.head()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 11, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/html": [ 403 | "
\n", 404 | "\n", 417 | "\n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | "
object_idddfhostgal_photoztarget
061510.000092
171311.626788
273010.226242
374510.281390
4112410.241590
\n", 465 | "
" 466 | ], 467 | "text/plain": [ 468 | " object_id ddf hostgal_photoz target\n", 469 | "0 615 1 0.0000 92\n", 470 | "1 713 1 1.6267 88\n", 471 | "2 730 1 0.2262 42\n", 472 | "3 745 1 0.2813 90\n", 473 | "4 1124 1 0.2415 90" 474 | ] 475 | }, 476 | "execution_count": 11, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "meta_cols = ['object_id', 'ddf', 'hostgal_photoz', 'target']\n", 483 | "meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n", 484 | "meta_train.head()" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 12, 490 | "metadata": { 491 | "scrolled": true 492 | }, 493 | "outputs": [ 494 | { 495 | "data": { 496 | "application/vnd.jupyter.widget-view+json": { 497 | "model_id": "2c2dd22bf588423bbceb9b32fecc7621", 498 | "version_major": 2, 499 | "version_minor": 0 500 | }, 501 | "text/plain": [ 502 | "HBox(children=(IntProgress(value=0, max=11), HTML(value='')))" 503 | ] 504 | }, 505 | "metadata": {}, 506 | "output_type": "display_data" 507 | }, 508 | { 509 | "name": "stdout", 510 | "output_type": "stream", 511 | "text": [ 512 | "\n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "n_tta = 11\n", 518 | "\n", 519 | "ttas = [get_tta(train, meta_train, i) for i in tqdm_notebook(range(11))]\n", 520 | "\n", 521 | "#for tta in ttas:\n", 522 | "# tta.fillna(train_mean, inplace=True)" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 13, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "with open('../data/ttas_%s.pkl' % fname, 'wb') as file:\n", 532 | " pkl.dump(ttas, file)\n" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 14, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "def work_test(param):\n", 542 | " (chunk_id, fname) = param\n", 543 | " print('starting worker', chunk_id)\n", 544 | " meta_test = pd.read_csv('../input/test_set_metadata.csv')\n", 545 | " with open('../input/test_chunk_%d.csv' % chunk_id, 'rb') as file:\n", 546 | " test_chunk = pkl.load(file)\n", 547 | " full_test = add_features(test_chunk, meta_test)\n", 548 | " \n", 549 | " with open('../data/full_test_chunk_%s_%d.pkl' % (fname, chunk_id), 'wb') as file:\n", 550 | " pkl.dump(full_test, file)\n", 551 | " print('ending worker', chunk_id)\n", 552 | " return 'done'" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 15, 558 | "metadata": { 559 | "scrolled": true 560 | }, 561 | "outputs": [ 562 | { 563 | "name": "stdout", 564 | "output_type": "stream", 565 | "text": [ 566 | "starting worker 0\n", 567 | "starting worker 1\n", 568 | "starting worker 3\n", 569 | "starting worker 2\n", 570 | "starting worker 4\n", 571 | "ending worker 0\n", 572 | "starting worker 5\n", 573 | "ending worker 1\n", 574 | "starting worker 6\n", 575 | "ending worker 2\n", 576 | "starting worker 7\n", 577 | "ending worker 3\n", 578 | "starting worker 8\n", 579 | "ending worker 4\n", 580 | "starting worker 9\n", 581 | "ending worker 5\n", 582 | "starting worker 10\n", 583 | "ending worker 6\n", 584 | "starting worker 11\n", 585 | "ending worker 7\n", 586 | "starting worker 12\n", 587 | "ending worker 9\n", 588 | "starting worker 13\n", 589 | "ending worker 8\n", 590 | "starting worker 14\n", 591 | "ending worker 10\n", 592 | "starting worker 15\n", 593 | "ending worker 11\n", 594 | "starting worker 16\n", 595 | "ending worker 12\n", 596 | "starting worker 17\n", 597 | "ending worker 13\n", 598 | "starting worker 18\n", 599 | "ending worker 14\n", 600 | "starting worker 19\n", 601 | "ending worker 15\n", 602 | "starting worker 20\n", 603 | "ending worker 16\n", 604 | "starting worker 21\n", 605 | "ending worker 17\n", 606 | "starting worker 22\n", 607 | "ending worker 18\n", 608 | "starting worker 23\n", 609 | "ending worker 19\n", 610 | "starting worker 24\n", 611 | "ending worker 20\n", 612 | "starting worker 25\n", 613 | "ending worker 21\n", 614 | "starting worker 26\n", 615 | "ending worker 22\n", 616 | "starting worker 27\n", 617 | "ending worker 23\n", 618 | "starting worker 28\n", 619 | "ending worker 24\n", 620 | "starting worker 29\n", 621 | "ending worker 25\n", 622 | "starting worker 30\n", 623 | "ending worker 26\n", 624 | "starting worker 31\n", 625 | "ending worker 27\n", 626 | "starting worker 32\n", 627 | "ending worker 28\n", 628 | "starting worker 33\n", 629 | "ending worker 29\n", 630 | "starting worker 34\n", 631 | "ending worker 30\n", 632 | "starting worker 35\n", 633 | "ending worker 31\n", 634 | "starting worker 36\n", 635 | "ending worker 32\n", 636 | "starting worker 37\n", 637 | "ending worker 33\n", 638 | "starting worker 38\n", 639 | "ending worker 34\n", 640 | "starting worker 39\n", 641 | "ending worker 35\n", 642 | "starting worker 40\n", 643 | "ending worker 36\n", 644 | "starting worker 41\n", 645 | "ending worker 37\n", 646 | "starting worker 42\n", 647 | "ending worker 38\n", 648 | "starting worker 43\n", 649 | "ending worker 39\n", 650 | "starting worker 44\n", 651 | "ending worker 40\n", 652 | "starting worker 45\n", 653 | "ending worker 41\n", 654 | "starting worker 46\n", 655 | "ending worker 42\n", 656 | "starting worker 47\n", 657 | "ending worker 43\n", 658 | "starting worker 48\n", 659 | "ending worker 44\n", 660 | "starting worker 49\n", 661 | "ending worker 45\n", 662 | "starting worker 50\n", 663 | "ending worker 46\n", 664 | "starting worker 51\n", 665 | "ending worker 47\n", 666 | "starting worker 52\n", 667 | "ending worker 48\n", 668 | "starting worker 53\n", 669 | "ending worker 49\n", 670 | "starting worker 54\n", 671 | "ending worker 50\n", 672 | "starting worker 55\n", 673 | "ending worker 51\n", 674 | "starting worker 56\n", 675 | "ending worker 52\n", 676 | "starting worker 57\n", 677 | "ending worker 53\n", 678 | "starting worker 58\n", 679 | "ending worker 54\n", 680 | "starting worker 59\n", 681 | "ending worker 55\n", 682 | "starting worker 60\n", 683 | "ending worker 56\n", 684 | "starting worker 61\n", 685 | "ending worker 57\n", 686 | "starting worker 62\n", 687 | "ending worker 58\n", 688 | "starting worker 63\n", 689 | "ending worker 59\n", 690 | "starting worker 64\n", 691 | "ending worker 60\n", 692 | "starting worker 65\n", 693 | "ending worker 61\n", 694 | "starting worker 66\n", 695 | "ending worker 62\n", 696 | "starting worker 67\n", 697 | "ending worker 63\n", 698 | "starting worker 68\n", 699 | "ending worker 64\n", 700 | "starting worker 69\n", 701 | "ending worker 65\n", 702 | "starting worker 70\n", 703 | "ending worker 66\n", 704 | "starting worker 71\n", 705 | "ending worker 67\n", 706 | "starting worker 72\n", 707 | "ending worker 68\n", 708 | "starting worker 73\n", 709 | "ending worker 69\n", 710 | "starting worker 74\n", 711 | "ending worker 71\n", 712 | "starting worker 75\n", 713 | "ending worker 70\n", 714 | "starting worker 76\n", 715 | "ending worker 73\n", 716 | "starting worker 77\n", 717 | "ending worker 72\n", 718 | "starting worker 78\n", 719 | "ending worker 74\n", 720 | "starting worker 79\n", 721 | "ending worker 75\n", 722 | "starting worker 80\n", 723 | "ending worker 76\n", 724 | "starting worker 81\n", 725 | "ending worker 77\n", 726 | "starting worker 82\n", 727 | "ending worker 78\n", 728 | "starting worker 83\n", 729 | "ending worker 79\n", 730 | "starting worker 84\n", 731 | "ending worker 80\n", 732 | "starting worker 85\n", 733 | "ending worker 81\n", 734 | "starting worker 86\n", 735 | "ending worker 82\n", 736 | "starting worker 87\n", 737 | "ending worker 83\n", 738 | "starting worker 88\n", 739 | "ending worker 84\n", 740 | "starting worker 89\n", 741 | "ending worker 85\n", 742 | "starting worker 90\n", 743 | "ending worker 86\n", 744 | "starting worker 100\n", 745 | "ending worker 100\n", 746 | "ending worker 87\n", 747 | "ending worker 88\n", 748 | "ending worker 90\n", 749 | "ending worker 89\n" 750 | ] 751 | } 752 | ], 753 | "source": [ 754 | "params = [(i, fname) for i in range(91)]\n", 755 | "params.append((100, fname))\n", 756 | "\n", 757 | "if 1: \n", 758 | " pool = Pool(processes=5, maxtasksperchild=1)\n", 759 | " ls = pool.map( work_test, params, chunksize=1 )\n", 760 | " pool.close()\n", 761 | "else:\n", 762 | " ls = [work_test(param) for param in params]" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [] 771 | } 772 | ], 773 | "metadata": { 774 | "kernelspec": { 775 | "display_name": "Python [conda env:xgb8]", 776 | "language": "python", 777 | "name": "conda-env-xgb8-py" 778 | }, 779 | "language_info": { 780 | "codemirror_mode": { 781 | "name": "ipython", 782 | "version": 3 783 | }, 784 | "file_extension": ".py", 785 | "mimetype": "text/x-python", 786 | "name": "python", 787 | "nbconvert_exporter": "python", 788 | "pygments_lexer": "ipython3", 789 | "version": "3.6.5" 790 | } 791 | }, 792 | "nbformat": 4, 793 | "nbformat_minor": 2 794 | } 795 | -------------------------------------------------------------------------------- /code/celerite_003.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "fname='celerite_003'\n", 10 | "\n", 11 | "n_tta = 6\n", 12 | "\n", 13 | "seed = 0" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "from sklearn.model_selection import StratifiedKFold\n", 25 | "from sklearn.metrics import confusion_matrix\n", 26 | "import gc\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import seaborn as sns\n", 29 | "import logging\n", 30 | "from tqdm import tqdm_notebook\n", 31 | "import itertools\n", 32 | "import pickle as pkl\n", 33 | "\n", 34 | "import autograd\n", 35 | "import celerite\n", 36 | "from celerite import terms\n", 37 | "import scipy.optimize as op\n", 38 | "from scipy.optimize import minimize\n", 39 | "\n", 40 | "\n", 41 | "from multiprocessing import Pool" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "import random as rn\n", 51 | "def init_seeds(seed):\n", 52 | "\n", 53 | " # The below is necessary for starting Numpy generated random numbers\n", 54 | " # in a well-defined initial state.\n", 55 | "\n", 56 | " np.random.seed(seed)\n", 57 | "\n", 58 | " # The below is necessary for starting core Python generated random numbers\n", 59 | " # in a well-defined state.\n", 60 | "\n", 61 | " rn.seed(seed)\n", 62 | "\n", 63 | "\n", 64 | "init_seeds(seed)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "# eda_031_celerite\n", 74 | "def get_gp(train, object_id, expand=True):\n", 75 | " passbands = [0, 1, 2, 3, 4, 5]\n", 76 | " n_param = 2\n", 77 | " res = pd.DataFrame()\n", 78 | " res['object_id'] = [object_id]\n", 79 | " for pb in passbands:\n", 80 | " for i in range(n_param):\n", 81 | " res['celerite_%d_%d' % (pb, i)] = np.NaN\n", 82 | " df0 = train[train.object_id == object_id]\n", 83 | " if df0.hostgal_photoz.mean() == 0:\n", 84 | " return res\n", 85 | " offset = 11\n", 86 | " for pb in range(6):\n", 87 | " if True:\n", 88 | " df = df0[(df0.object_id == object_id) & (df0.passband == pb)]\n", 89 | " flux_err_mean = df.flux_err.mean()\n", 90 | " flux_err_std = df.flux_err.std()\n", 91 | " df = df[df.flux_err <= flux_err_mean + 6*flux_err_std]\n", 92 | " mjd_delta_prev = (df.mjd - df.mjd.shift(1)).fillna(100).values.ravel()\n", 93 | " mjd_delta_next = (df.mjd.shift(-1) - df.mjd).fillna(100).values.ravel()\n", 94 | " x_min = df.mjd.min()\n", 95 | " x_max = df.mjd.max()\n", 96 | " yerr_mean = df.flux_err.mean()\n", 97 | " x = df.mjd.values\n", 98 | " y = df.flux.values\n", 99 | " yerr = df.flux_err\n", 100 | " if expand:\n", 101 | " mjd_delta_prev = np.concatenate((100 * np.ones((offset,)),\n", 102 | " mjd_delta_prev,\n", 103 | " 100 * np.ones((offset,)),\n", 104 | " ))\n", 105 | " mjd_delta_next = np.concatenate((100 * np.ones((offset,)),\n", 106 | " mjd_delta_next,\n", 107 | " 100 * np.ones((offset,)),\n", 108 | " ))\n", 109 | " x = np.concatenate((np.linspace(x_min-250, x_min -200, offset),\n", 110 | " x,\n", 111 | " np.linspace(x_max+200, x_max+250, offset),\n", 112 | " ))\n", 113 | " y = np.concatenate((np.random.randn(offset) * yerr_mean,\n", 114 | " y,\n", 115 | " np.random.randn(offset) * yerr_mean\n", 116 | " ))\n", 117 | " yerr = np.concatenate((yerr_mean * np.ones(offset),\n", 118 | " yerr,\n", 119 | " yerr_mean * np.ones(offset)\n", 120 | " ))\n", 121 | " #ystd = y.std()\n", 122 | " #y /= ystd\n", 123 | " #yerr = yerr / ystd\n", 124 | "\n", 125 | " # A Matern32 component\n", 126 | " log_sigma = 0\n", 127 | " log_rho = 0\n", 128 | " eps = 0.001\n", 129 | " bounds = dict(log_sigma=(-15, 15), log_rho=(-15, 15))\n", 130 | " kernel = terms.Matern32Term(log_sigma=log_sigma, log_rho=log_rho, eps=eps, bounds=bounds)\n", 131 | " #kernel.freeze_parameter(\"eps\") # We don't want to fit for \"Q\" in this term\n", 132 | "\n", 133 | "\n", 134 | " gp = celerite.GP(kernel, mean=0)\n", 135 | " gp.compute(x, yerr) # You always need to call compute once.\n", 136 | "\n", 137 | " def neg_log_like(params, y, gp):\n", 138 | " gp.set_parameter_vector(params)\n", 139 | " return -gp.log_likelihood(y)\n", 140 | "\n", 141 | " def grad_neg_log_like(params, y, gp):\n", 142 | " gp.set_parameter_vector(params)\n", 143 | " return -gp.grad_log_likelihood(y)[1]\n", 144 | "\n", 145 | " initial_params = gp.get_parameter_vector()\n", 146 | " bounds = gp.get_parameter_bounds()\n", 147 | "\n", 148 | " r = minimize(neg_log_like, initial_params, jac=grad_neg_log_like, \n", 149 | " method=\"L-BFGS-B\", bounds=bounds, args=(y, gp))\n", 150 | " for i in range(n_param):\n", 151 | " res['celerite_%d_%d' % (pb, i)] = r.x[i]\n", 152 | " else:\n", 153 | " continue\n", 154 | " return res" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 5, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "def apply_gp(df, meta):\n", 164 | " df = df[['object_id', 'mjd', 'passband', 'flux', 'flux_err']].merge(meta[['object_id', 'hostgal_photoz']],\n", 165 | " how='left', on='object_id')\n", 166 | " agg = [get_gp(df, object_id) for object_id in tqdm_notebook(df.object_id.unique())]\n", 167 | " return pd.concat(agg, axis=0)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/html": [ 178 | "
\n", 179 | "\n", 192 | "\n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | "
object_idmjdpassbandfluxflux_errdetected
061559750.42292-544.8103033.6229521
161559750.43061-816.4343265.5533701
261559750.43833-471.3855293.8012131
361559750.44504-388.98498511.3950311
461559752.40702-681.8588874.0412041
\n", 252 | "
" 253 | ], 254 | "text/plain": [ 255 | " object_id mjd passband flux flux_err detected\n", 256 | "0 615 59750.4229 2 -544.810303 3.622952 1\n", 257 | "1 615 59750.4306 1 -816.434326 5.553370 1\n", 258 | "2 615 59750.4383 3 -471.385529 3.801213 1\n", 259 | "3 615 59750.4450 4 -388.984985 11.395031 1\n", 260 | "4 615 59752.4070 2 -681.858887 4.041204 1" 261 | ] 262 | }, 263 | "execution_count": 6, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "train = pd.read_csv('../input/training_set.csv')\n", 270 | "train.head()" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 7, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/html": [ 281 | "
\n", 282 | "\n", 295 | "\n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
object_idddfhostgal_photoztarget
061510.000092
171311.626788
273010.226242
374510.281390
4112410.241590
\n", 343 | "
" 344 | ], 345 | "text/plain": [ 346 | " object_id ddf hostgal_photoz target\n", 347 | "0 615 1 0.0000 92\n", 348 | "1 713 1 1.6267 88\n", 349 | "2 730 1 0.2262 42\n", 350 | "3 745 1 0.2813 90\n", 351 | "4 1124 1 0.2415 90" 352 | ] 353 | }, 354 | "execution_count": 7, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "meta_cols = ['object_id', 'ddf', 'hostgal_photoz', 'target']\n", 361 | "meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n", 362 | "meta_train.head()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 8, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/html": [ 373 | "
\n", 374 | "\n", 387 | "\n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | "
object_idcelerite_0_0celerite_0_1celerite_1_0celerite_1_1celerite_2_0celerite_2_1celerite_3_0celerite_3_1celerite_4_0celerite_4_1celerite_5_0celerite_5_1
041734.3723275.0030634.669525.3508745.3802495.9559094.2565755.2563.8842355.0174653.4527964.815897
\n", 425 | "
" 426 | ], 427 | "text/plain": [ 428 | " object_id celerite_0_0 celerite_0_1 celerite_1_0 celerite_1_1 \\\n", 429 | "0 4173 4.372327 5.003063 4.66952 5.350874 \n", 430 | "\n", 431 | " celerite_2_0 celerite_2_1 celerite_3_0 celerite_3_1 celerite_4_0 \\\n", 432 | "0 5.380249 5.955909 4.256575 5.256 3.884235 \n", 433 | "\n", 434 | " celerite_4_1 celerite_5_0 celerite_5_1 \n", 435 | "0 5.017465 3.452796 4.815897 " 436 | ] 437 | }, 438 | "execution_count": 8, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "get_gp(train.merge(meta_train, how='left', on='object_id'), 4173)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 8, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/html": [ 455 | "
\n", 456 | "\n", 469 | "\n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | "
object_idcelerite_0_0celerite_0_1celerite_1_0celerite_1_1celerite_2_0celerite_2_1celerite_3_0celerite_3_1celerite_4_0celerite_4_1celerite_5_0celerite_5_1
041734.3723275.0030634.669525.3508745.3802495.9559094.2565755.2563.8842355.0174653.4408754.792469
\n", 507 | "
" 508 | ], 509 | "text/plain": [ 510 | " object_id celerite_0_0 celerite_0_1 celerite_1_0 celerite_1_1 \\\n", 511 | "0 4173 4.372327 5.003063 4.66952 5.350874 \n", 512 | "\n", 513 | " celerite_2_0 celerite_2_1 celerite_3_0 celerite_3_1 celerite_4_0 \\\n", 514 | "0 5.380249 5.955909 4.256575 5.256 3.884235 \n", 515 | "\n", 516 | " celerite_4_1 celerite_5_0 celerite_5_1 \n", 517 | "0 5.017465 3.440875 4.792469 " 518 | ] 519 | }, 520 | "execution_count": 8, 521 | "metadata": {}, 522 | "output_type": "execute_result" 523 | } 524 | ], 525 | "source": [] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 9, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "def work_tta(param):\n", 534 | " (i, fname) = param\n", 535 | " print('starting worker', i)\n", 536 | " train = pd.read_csv('../input/training_set.csv')\n", 537 | " meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n", 538 | " df = train.copy()\n", 539 | " if i > 0:\n", 540 | " init_seeds(i)\n", 541 | " df['flux'] += df['flux_err'] * np.random.randn(*df['flux_err'].shape)\n", 542 | " df = apply_gp(df, meta_train)\n", 543 | " with open('../data/tta_%d_%s.pkl' % (i, fname), 'wb') as file:\n", 544 | " pkl.dump(df, file) \n", 545 | " print('ending worker', i)\n", 546 | " return 'done'" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 10, 552 | "metadata": { 553 | "scrolled": true 554 | }, 555 | "outputs": [ 556 | { 557 | "name": "stdout", 558 | "output_type": "stream", 559 | "text": [ 560 | "starting worker 2\n", 561 | "starting worker 0\n", 562 | "starting worker 5\n", 563 | "starting worker 1\n", 564 | "starting worker 3\n", 565 | "starting worker 7\n", 566 | "starting worker 8\n", 567 | "starting worker 4\n", 568 | "starting worker 6\n", 569 | "starting worker 9\n", 570 | "starting worker 10\n" 571 | ] 572 | }, 573 | { 574 | "data": { 575 | "application/vnd.jupyter.widget-view+json": { 576 | "model_id": "055ec5381e224bf4aa8b822d643bef44", 577 | "version_major": 2, 578 | "version_minor": 0 579 | }, 580 | "text/plain": [ 581 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 582 | ] 583 | }, 584 | "metadata": {}, 585 | "output_type": "display_data" 586 | }, 587 | { 588 | "data": { 589 | "application/vnd.jupyter.widget-view+json": { 590 | "model_id": "eccc4f4abb6f4469949010054621ba13", 591 | "version_major": 2, 592 | "version_minor": 0 593 | }, 594 | "text/plain": [ 595 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 596 | ] 597 | }, 598 | "metadata": {}, 599 | "output_type": "display_data" 600 | }, 601 | { 602 | "data": { 603 | "application/vnd.jupyter.widget-view+json": { 604 | "model_id": "7a5a7f381a2148ddb753e3993676928a", 605 | "version_major": 2, 606 | "version_minor": 0 607 | }, 608 | "text/plain": [ 609 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 610 | ] 611 | }, 612 | "metadata": {}, 613 | "output_type": "display_data" 614 | }, 615 | { 616 | "data": { 617 | "application/vnd.jupyter.widget-view+json": { 618 | "model_id": "3b5c7b17c28247fbb1248c91feabee0b", 619 | "version_major": 2, 620 | "version_minor": 0 621 | }, 622 | "text/plain": [ 623 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 624 | ] 625 | }, 626 | "metadata": {}, 627 | "output_type": "display_data" 628 | }, 629 | { 630 | "data": { 631 | "application/vnd.jupyter.widget-view+json": { 632 | "model_id": "1f0df15e730e42a0bc602dbc25b4d249", 633 | "version_major": 2, 634 | "version_minor": 0 635 | }, 636 | "text/plain": [ 637 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 638 | ] 639 | }, 640 | "metadata": {}, 641 | "output_type": "display_data" 642 | }, 643 | { 644 | "data": { 645 | "application/vnd.jupyter.widget-view+json": { 646 | "model_id": "fff19743f3bd47f880d906aa2bd99cda", 647 | "version_major": 2, 648 | "version_minor": 0 649 | }, 650 | "text/plain": [ 651 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 652 | ] 653 | }, 654 | "metadata": {}, 655 | "output_type": "display_data" 656 | }, 657 | { 658 | "data": { 659 | "application/vnd.jupyter.widget-view+json": { 660 | "model_id": "baefe6c321c345cb97e504a0eda324eb", 661 | "version_major": 2, 662 | "version_minor": 0 663 | }, 664 | "text/plain": [ 665 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 666 | ] 667 | }, 668 | "metadata": {}, 669 | "output_type": "display_data" 670 | }, 671 | { 672 | "data": { 673 | "application/vnd.jupyter.widget-view+json": { 674 | "model_id": "b4605f0a84414c29a339d7517733972b", 675 | "version_major": 2, 676 | "version_minor": 0 677 | }, 678 | "text/plain": [ 679 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 680 | ] 681 | }, 682 | "metadata": {}, 683 | "output_type": "display_data" 684 | }, 685 | { 686 | "data": { 687 | "application/vnd.jupyter.widget-view+json": { 688 | "model_id": "d469f7b4f9af44ff93910f5aec1e17e2", 689 | "version_major": 2, 690 | "version_minor": 0 691 | }, 692 | "text/plain": [ 693 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 694 | ] 695 | }, 696 | "metadata": {}, 697 | "output_type": "display_data" 698 | }, 699 | { 700 | "data": { 701 | "application/vnd.jupyter.widget-view+json": { 702 | "model_id": "c8045a76ae1b4a7dbfdce2b89e8be1f9", 703 | "version_major": 2, 704 | "version_minor": 0 705 | }, 706 | "text/plain": [ 707 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 708 | ] 709 | }, 710 | "metadata": {}, 711 | "output_type": "display_data" 712 | }, 713 | { 714 | "data": { 715 | "application/vnd.jupyter.widget-view+json": { 716 | "model_id": "f368619da67e491a9399790950f72025", 717 | "version_major": 2, 718 | "version_minor": 0 719 | }, 720 | "text/plain": [ 721 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 722 | ] 723 | }, 724 | "metadata": {}, 725 | "output_type": "display_data" 726 | }, 727 | { 728 | "name": "stdout", 729 | "output_type": "stream", 730 | "text": [ 731 | "\n", 732 | "ending worker 7\n", 733 | "\n", 734 | "ending worker 8\n", 735 | "\n", 736 | "\n", 737 | "\n", 738 | "ending worker 3\n", 739 | "\n", 740 | "\n", 741 | "ending worker 5\n", 742 | "\n", 743 | "ending worker 0\n", 744 | "ending worker 9\n", 745 | "ending worker 4\n", 746 | "ending worker 1\n", 747 | "\n", 748 | "ending worker 10\n", 749 | "\n", 750 | "ending worker 2\n", 751 | "\n", 752 | "ending worker 6\n" 753 | ] 754 | } 755 | ], 756 | "source": [ 757 | "params = [(i, fname) for i in range(11)]\n", 758 | "\n", 759 | "if 1: \n", 760 | " pool = Pool(processes=11, maxtasksperchild=1)\n", 761 | " ls = pool.map( work_tta, params, chunksize=1 )\n", 762 | " pool.close()\n", 763 | "else:\n", 764 | " ls = [work_tta(param) for param in params]" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": { 771 | "scrolled": true 772 | }, 773 | "outputs": [ 774 | { 775 | "name": "stdout", 776 | "output_type": "stream", 777 | "text": [ 778 | "starting worker 1\n", 779 | "starting worker 0\n", 780 | "starting worker 2\n", 781 | "starting worker 3\n", 782 | "starting worker 7\n", 783 | "starting worker 5\n", 784 | "starting worker 4\n", 785 | "starting worker 6\n", 786 | "starting worker 8\n", 787 | "starting worker 10\n", 788 | "starting worker 9\n" 789 | ] 790 | }, 791 | { 792 | "data": { 793 | "application/vnd.jupyter.widget-view+json": { 794 | "model_id": "b73097db30c948ec95bdeae042d30f72", 795 | "version_major": 2, 796 | "version_minor": 0 797 | }, 798 | "text/plain": [ 799 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 800 | ] 801 | }, 802 | "metadata": {}, 803 | "output_type": "display_data" 804 | }, 805 | { 806 | "data": { 807 | "application/vnd.jupyter.widget-view+json": { 808 | "model_id": "6ecc80db2b0e4898b52a8aa4630bc0bb", 809 | "version_major": 2, 810 | "version_minor": 0 811 | }, 812 | "text/plain": [ 813 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 814 | ] 815 | }, 816 | "metadata": {}, 817 | "output_type": "display_data" 818 | }, 819 | { 820 | "data": { 821 | "application/vnd.jupyter.widget-view+json": { 822 | "model_id": "6e4dd2a6fd444d06bffed8c7bd7b5c6b", 823 | "version_major": 2, 824 | "version_minor": 0 825 | }, 826 | "text/plain": [ 827 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 828 | ] 829 | }, 830 | "metadata": {}, 831 | "output_type": "display_data" 832 | }, 833 | { 834 | "data": { 835 | "application/vnd.jupyter.widget-view+json": { 836 | "model_id": "0eab5b22761843f28e7c2ff2c1d99305", 837 | "version_major": 2, 838 | "version_minor": 0 839 | }, 840 | "text/plain": [ 841 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 842 | ] 843 | }, 844 | "metadata": {}, 845 | "output_type": "display_data" 846 | }, 847 | { 848 | "data": { 849 | "application/vnd.jupyter.widget-view+json": { 850 | "model_id": "f5dfdb471f44464eb4cfd36bef0e95cf", 851 | "version_major": 2, 852 | "version_minor": 0 853 | }, 854 | "text/plain": [ 855 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 856 | ] 857 | }, 858 | "metadata": {}, 859 | "output_type": "display_data" 860 | }, 861 | { 862 | "data": { 863 | "application/vnd.jupyter.widget-view+json": { 864 | "model_id": "e96259aa66d14fa5bacea4f554de342e", 865 | "version_major": 2, 866 | "version_minor": 0 867 | }, 868 | "text/plain": [ 869 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 870 | ] 871 | }, 872 | "metadata": {}, 873 | "output_type": "display_data" 874 | }, 875 | { 876 | "data": { 877 | "application/vnd.jupyter.widget-view+json": { 878 | "model_id": "0577077a150a424fa4ef7c50426ed125", 879 | "version_major": 2, 880 | "version_minor": 0 881 | }, 882 | "text/plain": [ 883 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 884 | ] 885 | }, 886 | "metadata": {}, 887 | "output_type": "display_data" 888 | }, 889 | { 890 | "data": { 891 | "application/vnd.jupyter.widget-view+json": { 892 | "model_id": "ab0b35f698fd4fa5897a129b0db250d6", 893 | "version_major": 2, 894 | "version_minor": 0 895 | }, 896 | "text/plain": [ 897 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 898 | ] 899 | }, 900 | "metadata": {}, 901 | "output_type": "display_data" 902 | }, 903 | { 904 | "data": { 905 | "application/vnd.jupyter.widget-view+json": { 906 | "model_id": "17d8fbecaabd450599501d3aab450ffc", 907 | "version_major": 2, 908 | "version_minor": 0 909 | }, 910 | "text/plain": [ 911 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 912 | ] 913 | }, 914 | "metadata": {}, 915 | "output_type": "display_data" 916 | }, 917 | { 918 | "data": { 919 | "application/vnd.jupyter.widget-view+json": { 920 | "model_id": "e95cb3f771f6438bb39b3374ca7c04ad", 921 | "version_major": 2, 922 | "version_minor": 0 923 | }, 924 | "text/plain": [ 925 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 926 | ] 927 | }, 928 | "metadata": {}, 929 | "output_type": "display_data" 930 | }, 931 | { 932 | "data": { 933 | "application/vnd.jupyter.widget-view+json": { 934 | "model_id": "b4ddfc8e5daa4c8d9641ce0fd6dccf9e", 935 | "version_major": 2, 936 | "version_minor": 0 937 | }, 938 | "text/plain": [ 939 | "HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))" 940 | ] 941 | }, 942 | "metadata": {}, 943 | "output_type": "display_data" 944 | } 945 | ], 946 | "source": [] 947 | }, 948 | { 949 | "cell_type": "code", 950 | "execution_count": 11, 951 | "metadata": {}, 952 | "outputs": [], 953 | "source": [ 954 | "def work_test(param):\n", 955 | " (i, fname) = param\n", 956 | " print('starting worker', i)\n", 957 | " with open('../input/test_chunk_%d.csv' %i, 'rb') as file:\n", 958 | " test = pkl.load(file)\n", 959 | " meta_test = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n", 960 | " df = apply_gp(test, meta_test)\n", 961 | " with open('../data/test_%d_%s.pkl' % (i, fname), 'wb') as file:\n", 962 | " pkl.dump(df, file) \n", 963 | " print('ending worker', i)\n", 964 | " return 'done'" 965 | ] 966 | }, 967 | { 968 | "cell_type": "code", 969 | "execution_count": 12, 970 | "metadata": { 971 | "scrolled": true 972 | }, 973 | "outputs": [ 974 | { 975 | "name": "stdout", 976 | "output_type": "stream", 977 | "text": [ 978 | "starting worker 0\n", 979 | "starting worker 2\n", 980 | "starting worker 4\n", 981 | "starting worker 3\n", 982 | "starting worker 6\n", 983 | "starting worker 8\n", 984 | "starting worker 5\n", 985 | "starting worker 1\n", 986 | "starting worker 7\n", 987 | "starting worker 15\n", 988 | "starting worker 11\n", 989 | "starting worker 9\n", 990 | "starting worker 10\n", 991 | "starting worker 14\n", 992 | "starting worker 13\n", 993 | "starting worker 12\n", 994 | "starting worker 18\n", 995 | "starting worker 16\n", 996 | "starting worker 19\n", 997 | "starting worker 17\n" 998 | ] 999 | }, 1000 | { 1001 | "data": { 1002 | "application/vnd.jupyter.widget-view+json": { 1003 | "model_id": "081e7a429d86495c80833ac91191028f", 1004 | "version_major": 2, 1005 | "version_minor": 0 1006 | }, 1007 | "text/plain": [ 1008 | "HBox(children=(IntProgress(value=0, max=15137), HTML(value='')))" 1009 | ] 1010 | }, 1011 | "metadata": {}, 1012 | "output_type": "display_data" 1013 | }, 1014 | { 1015 | "data": { 1016 | "application/vnd.jupyter.widget-view+json": { 1017 | "model_id": "781ff9d5e6ed4d18956acd0f7b8cac28", 1018 | "version_major": 2, 1019 | "version_minor": 0 1020 | }, 1021 | "text/plain": [ 1022 | "HBox(children=(IntProgress(value=0, max=39057), HTML(value='')))" 1023 | ] 1024 | }, 1025 | "metadata": {}, 1026 | "output_type": "display_data" 1027 | }, 1028 | { 1029 | "data": { 1030 | "application/vnd.jupyter.widget-view+json": { 1031 | "model_id": "f3bf7e21700b458e8877e744bfaa35e7", 1032 | "version_major": 2, 1033 | "version_minor": 0 1034 | }, 1035 | "text/plain": [ 1036 | "HBox(children=(IntProgress(value=0, max=39048), HTML(value='')))" 1037 | ] 1038 | }, 1039 | "metadata": {}, 1040 | "output_type": "display_data" 1041 | }, 1042 | { 1043 | "data": { 1044 | "application/vnd.jupyter.widget-view+json": { 1045 | "model_id": "0ae14288da2c49b7b5da6c9448508e96", 1046 | "version_major": 2, 1047 | "version_minor": 0 1048 | }, 1049 | "text/plain": [ 1050 | "HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))" 1051 | ] 1052 | }, 1053 | "metadata": {}, 1054 | "output_type": "display_data" 1055 | }, 1056 | { 1057 | "data": { 1058 | "application/vnd.jupyter.widget-view+json": { 1059 | "model_id": "ad260cbca39244f4930c06cb8a97aa59", 1060 | "version_major": 2, 1061 | "version_minor": 0 1062 | }, 1063 | "text/plain": [ 1064 | "HBox(children=(IntProgress(value=0, max=39055), HTML(value='')))" 1065 | ] 1066 | }, 1067 | "metadata": {}, 1068 | "output_type": "display_data" 1069 | }, 1070 | { 1071 | "data": { 1072 | "application/vnd.jupyter.widget-view+json": { 1073 | "model_id": "82a0531942e94d4aa2155f706beeac1c", 1074 | "version_major": 2, 1075 | "version_minor": 0 1076 | }, 1077 | "text/plain": [ 1078 | "HBox(children=(IntProgress(value=0, max=34964), HTML(value='')))" 1079 | ] 1080 | }, 1081 | "metadata": {}, 1082 | "output_type": "display_data" 1083 | }, 1084 | { 1085 | "data": { 1086 | "application/vnd.jupyter.widget-view+json": { 1087 | "model_id": "5f04dea202824a74ad3b409327ba06da", 1088 | "version_major": 2, 1089 | "version_minor": 0 1090 | }, 1091 | "text/plain": [ 1092 | "HBox(children=(IntProgress(value=0, max=15183), HTML(value='')))" 1093 | ] 1094 | }, 1095 | "metadata": {}, 1096 | "output_type": "display_data" 1097 | }, 1098 | { 1099 | "data": { 1100 | "application/vnd.jupyter.widget-view+json": { 1101 | "model_id": "8349dfe2209a49db8c163dccdb0c247c", 1102 | "version_major": 2, 1103 | "version_minor": 0 1104 | }, 1105 | "text/plain": [ 1106 | "HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))" 1107 | ] 1108 | }, 1109 | "metadata": {}, 1110 | "output_type": "display_data" 1111 | }, 1112 | { 1113 | "data": { 1114 | "application/vnd.jupyter.widget-view+json": { 1115 | "model_id": "b00a953f1145411e84308e8c519760cc", 1116 | "version_major": 2, 1117 | "version_minor": 0 1118 | }, 1119 | "text/plain": [ 1120 | "HBox(children=(IntProgress(value=0, max=39096), HTML(value='')))" 1121 | ] 1122 | }, 1123 | "metadata": {}, 1124 | "output_type": "display_data" 1125 | }, 1126 | { 1127 | "data": { 1128 | "application/vnd.jupyter.widget-view+json": { 1129 | "model_id": "1828cabe3d534c50b60b7c3184898b89", 1130 | "version_major": 2, 1131 | "version_minor": 0 1132 | }, 1133 | "text/plain": [ 1134 | "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))" 1135 | ] 1136 | }, 1137 | "metadata": {}, 1138 | "output_type": "display_data" 1139 | }, 1140 | { 1141 | "data": { 1142 | "application/vnd.jupyter.widget-view+json": { 1143 | "model_id": "9f5957409ada4a0f944c8f80c62baa9b", 1144 | "version_major": 2, 1145 | "version_minor": 0 1146 | }, 1147 | "text/plain": [ 1148 | "HBox(children=(IntProgress(value=0, max=39058), HTML(value='')))" 1149 | ] 1150 | }, 1151 | "metadata": {}, 1152 | "output_type": "display_data" 1153 | }, 1154 | { 1155 | "data": { 1156 | "application/vnd.jupyter.widget-view+json": { 1157 | "model_id": "1703aae7b6ec4856b95f4bfdfbc51901", 1158 | "version_major": 2, 1159 | "version_minor": 0 1160 | }, 1161 | "text/plain": [ 1162 | "HBox(children=(IntProgress(value=0, max=39033), HTML(value='')))" 1163 | ] 1164 | }, 1165 | "metadata": {}, 1166 | "output_type": "display_data" 1167 | }, 1168 | { 1169 | "data": { 1170 | "application/vnd.jupyter.widget-view+json": { 1171 | "model_id": "bb4caad2c94140b6b7721025f86130cb", 1172 | "version_major": 2, 1173 | "version_minor": 0 1174 | }, 1175 | "text/plain": [ 1176 | "HBox(children=(IntProgress(value=0, max=39087), HTML(value='')))" 1177 | ] 1178 | }, 1179 | "metadata": {}, 1180 | "output_type": "display_data" 1181 | }, 1182 | { 1183 | "data": { 1184 | "application/vnd.jupyter.widget-view+json": { 1185 | "model_id": "7dcffb5b2cd24f73a6ee4d74a8165763", 1186 | "version_major": 2, 1187 | "version_minor": 0 1188 | }, 1189 | "text/plain": [ 1190 | "HBox(children=(IntProgress(value=0, max=39098), HTML(value='')))" 1191 | ] 1192 | }, 1193 | "metadata": {}, 1194 | "output_type": "display_data" 1195 | }, 1196 | { 1197 | "data": { 1198 | "application/vnd.jupyter.widget-view+json": { 1199 | "model_id": "eb55ad2cb2e6498783d5d1f19a83afc2", 1200 | "version_major": 2, 1201 | "version_minor": 0 1202 | }, 1203 | "text/plain": [ 1204 | "HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))" 1205 | ] 1206 | }, 1207 | "metadata": {}, 1208 | "output_type": "display_data" 1209 | }, 1210 | { 1211 | "data": { 1212 | "application/vnd.jupyter.widget-view+json": { 1213 | "model_id": "336f44ab4e0c4faaa3cb1e12fbcc3c88", 1214 | "version_major": 2, 1215 | "version_minor": 0 1216 | }, 1217 | "text/plain": [ 1218 | "HBox(children=(IntProgress(value=0, max=39110), HTML(value='')))" 1219 | ] 1220 | }, 1221 | "metadata": {}, 1222 | "output_type": "display_data" 1223 | }, 1224 | { 1225 | "data": { 1226 | "application/vnd.jupyter.widget-view+json": { 1227 | "model_id": "9dad29362b4646f5901f0f4d3f0096e8", 1228 | "version_major": 2, 1229 | "version_minor": 0 1230 | }, 1231 | "text/plain": [ 1232 | "HBox(children=(IntProgress(value=0, max=39096), HTML(value='')))" 1233 | ] 1234 | }, 1235 | "metadata": {}, 1236 | "output_type": "display_data" 1237 | }, 1238 | { 1239 | "data": { 1240 | "application/vnd.jupyter.widget-view+json": { 1241 | "model_id": "a1ede30be16d480abc72fbc0ab02c239", 1242 | "version_major": 2, 1243 | "version_minor": 0 1244 | }, 1245 | "text/plain": [ 1246 | "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))" 1247 | ] 1248 | }, 1249 | "metadata": {}, 1250 | "output_type": "display_data" 1251 | }, 1252 | { 1253 | "data": { 1254 | "application/vnd.jupyter.widget-view+json": { 1255 | "model_id": "8b0514cee05d468a8e7ff73f51285e18", 1256 | "version_major": 2, 1257 | "version_minor": 0 1258 | }, 1259 | "text/plain": [ 1260 | "HBox(children=(IntProgress(value=0, max=39095), HTML(value='')))" 1261 | ] 1262 | }, 1263 | "metadata": {}, 1264 | "output_type": "display_data" 1265 | }, 1266 | { 1267 | "data": { 1268 | "application/vnd.jupyter.widget-view+json": { 1269 | "model_id": "a3af7b89986e462280dd9377a6b645d8", 1270 | "version_major": 2, 1271 | "version_minor": 0 1272 | }, 1273 | "text/plain": [ 1274 | "HBox(children=(IntProgress(value=0, max=39092), HTML(value='')))" 1275 | ] 1276 | }, 1277 | "metadata": {}, 1278 | "output_type": "display_data" 1279 | }, 1280 | { 1281 | "name": "stderr", 1282 | "output_type": "stream", 1283 | "text": [ 1284 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1285 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1286 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1287 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1288 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1289 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1290 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1291 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1292 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1293 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1294 | ] 1295 | }, 1296 | { 1297 | "name": "stdout", 1298 | "output_type": "stream", 1299 | "text": [ 1300 | "\n", 1301 | "ending worker 1\n", 1302 | "starting worker 20\n" 1303 | ] 1304 | }, 1305 | { 1306 | "data": { 1307 | "application/vnd.jupyter.widget-view+json": { 1308 | "model_id": "feb5b649cc574ff79ee1792feb5767e4", 1309 | "version_major": 2, 1310 | "version_minor": 0 1311 | }, 1312 | "text/plain": [ 1313 | "HBox(children=(IntProgress(value=0, max=39036), HTML(value='')))" 1314 | ] 1315 | }, 1316 | "metadata": {}, 1317 | "output_type": "display_data" 1318 | }, 1319 | { 1320 | "name": "stderr", 1321 | "output_type": "stream", 1322 | "text": [ 1323 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1324 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1325 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1326 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1327 | ] 1328 | }, 1329 | { 1330 | "name": "stdout", 1331 | "output_type": "stream", 1332 | "text": [ 1333 | "\n", 1334 | "ending worker 0\n", 1335 | "starting worker 21\n" 1336 | ] 1337 | }, 1338 | { 1339 | "data": { 1340 | "application/vnd.jupyter.widget-view+json": { 1341 | "model_id": "7917d9f219ef437caf59ca95e8709a14", 1342 | "version_major": 2, 1343 | "version_minor": 0 1344 | }, 1345 | "text/plain": [ 1346 | "HBox(children=(IntProgress(value=0, max=39020), HTML(value='')))" 1347 | ] 1348 | }, 1349 | "metadata": {}, 1350 | "output_type": "display_data" 1351 | }, 1352 | { 1353 | "name": "stderr", 1354 | "output_type": "stream", 1355 | "text": [ 1356 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1357 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1358 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1359 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1360 | ] 1361 | }, 1362 | { 1363 | "name": "stdout", 1364 | "output_type": "stream", 1365 | "text": [ 1366 | "\n", 1367 | "ending worker 2\n", 1368 | "starting worker 22\n" 1369 | ] 1370 | }, 1371 | { 1372 | "data": { 1373 | "application/vnd.jupyter.widget-view+json": { 1374 | "model_id": "12908b72b6874ace82cb9da2f91147c6", 1375 | "version_major": 2, 1376 | "version_minor": 0 1377 | }, 1378 | "text/plain": [ 1379 | "HBox(children=(IntProgress(value=0, max=39103), HTML(value='')))" 1380 | ] 1381 | }, 1382 | "metadata": {}, 1383 | "output_type": "display_data" 1384 | }, 1385 | { 1386 | "name": "stdout", 1387 | "output_type": "stream", 1388 | "text": [ 1389 | "\n", 1390 | "ending worker 17\n", 1391 | "starting worker 23\n" 1392 | ] 1393 | }, 1394 | { 1395 | "data": { 1396 | "application/vnd.jupyter.widget-view+json": { 1397 | "model_id": "8c92c11e3dee4c4ab740bc02e5a69d1c", 1398 | "version_major": 2, 1399 | "version_minor": 0 1400 | }, 1401 | "text/plain": [ 1402 | "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))" 1403 | ] 1404 | }, 1405 | "metadata": {}, 1406 | "output_type": "display_data" 1407 | }, 1408 | { 1409 | "name": "stdout", 1410 | "output_type": "stream", 1411 | "text": [ 1412 | "\n", 1413 | "ending worker 15\n", 1414 | "starting worker 24\n" 1415 | ] 1416 | }, 1417 | { 1418 | "data": { 1419 | "application/vnd.jupyter.widget-view+json": { 1420 | "model_id": "e004d296d8214f6a958232289f273759", 1421 | "version_major": 2, 1422 | "version_minor": 0 1423 | }, 1424 | "text/plain": [ 1425 | "HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))" 1426 | ] 1427 | }, 1428 | "metadata": {}, 1429 | "output_type": "display_data" 1430 | }, 1431 | { 1432 | "name": "stdout", 1433 | "output_type": "stream", 1434 | "text": [ 1435 | "\n", 1436 | "ending worker 14\n", 1437 | "starting worker 25\n" 1438 | ] 1439 | }, 1440 | { 1441 | "data": { 1442 | "application/vnd.jupyter.widget-view+json": { 1443 | "model_id": "e10fe83368964f69b5be5c72a0d104ca", 1444 | "version_major": 2, 1445 | "version_minor": 0 1446 | }, 1447 | "text/plain": [ 1448 | "HBox(children=(IntProgress(value=0, max=39094), HTML(value='')))" 1449 | ] 1450 | }, 1451 | "metadata": {}, 1452 | "output_type": "display_data" 1453 | }, 1454 | { 1455 | "name": "stdout", 1456 | "output_type": "stream", 1457 | "text": [ 1458 | "\n", 1459 | "\n", 1460 | "ending worker 18\n", 1461 | "starting worker 26\n" 1462 | ] 1463 | }, 1464 | { 1465 | "data": { 1466 | "application/vnd.jupyter.widget-view+json": { 1467 | "model_id": "c40f9d61202e42a48c8c8304e035bcd3", 1468 | "version_major": 2, 1469 | "version_minor": 0 1470 | }, 1471 | "text/plain": [ 1472 | "HBox(children=(IntProgress(value=0, max=39048), HTML(value='')))" 1473 | ] 1474 | }, 1475 | "metadata": {}, 1476 | "output_type": "display_data" 1477 | }, 1478 | { 1479 | "name": "stdout", 1480 | "output_type": "stream", 1481 | "text": [ 1482 | "ending worker 11\n", 1483 | "starting worker 27\n" 1484 | ] 1485 | }, 1486 | { 1487 | "data": { 1488 | "application/vnd.jupyter.widget-view+json": { 1489 | "model_id": "8a571bb8c63d41eb9a1d1acd1cef3984", 1490 | "version_major": 2, 1491 | "version_minor": 0 1492 | }, 1493 | "text/plain": [ 1494 | "HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))" 1495 | ] 1496 | }, 1497 | "metadata": {}, 1498 | "output_type": "display_data" 1499 | }, 1500 | { 1501 | "name": "stdout", 1502 | "output_type": "stream", 1503 | "text": [ 1504 | "\n", 1505 | "\n", 1506 | "ending worker 10\n", 1507 | "starting worker 28\n" 1508 | ] 1509 | }, 1510 | { 1511 | "data": { 1512 | "application/vnd.jupyter.widget-view+json": { 1513 | "model_id": "09bffa98949f49128250f1474b097767", 1514 | "version_major": 2, 1515 | "version_minor": 0 1516 | }, 1517 | "text/plain": [ 1518 | "HBox(children=(IntProgress(value=0, max=39095), HTML(value='')))" 1519 | ] 1520 | }, 1521 | "metadata": {}, 1522 | "output_type": "display_data" 1523 | }, 1524 | { 1525 | "name": "stdout", 1526 | "output_type": "stream", 1527 | "text": [ 1528 | "ending worker 8\n", 1529 | "starting worker 29\n" 1530 | ] 1531 | }, 1532 | { 1533 | "data": { 1534 | "application/vnd.jupyter.widget-view+json": { 1535 | "model_id": "86baa0da721f4a80815b315be9723348", 1536 | "version_major": 2, 1537 | "version_minor": 0 1538 | }, 1539 | "text/plain": [ 1540 | "HBox(children=(IntProgress(value=0, max=39083), HTML(value='')))" 1541 | ] 1542 | }, 1543 | "metadata": {}, 1544 | "output_type": "display_data" 1545 | }, 1546 | { 1547 | "name": "stdout", 1548 | "output_type": "stream", 1549 | "text": [ 1550 | "\n", 1551 | "\n", 1552 | "ending worker 7\n", 1553 | "starting worker 30\n" 1554 | ] 1555 | }, 1556 | { 1557 | "data": { 1558 | "application/vnd.jupyter.widget-view+json": { 1559 | "model_id": "476eeff05e5b438daf26731c05a88885", 1560 | "version_major": 2, 1561 | "version_minor": 0 1562 | }, 1563 | "text/plain": [ 1564 | "HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))" 1565 | ] 1566 | }, 1567 | "metadata": {}, 1568 | "output_type": "display_data" 1569 | }, 1570 | { 1571 | "name": "stdout", 1572 | "output_type": "stream", 1573 | "text": [ 1574 | "ending worker 19\n", 1575 | "starting worker 31\n" 1576 | ] 1577 | }, 1578 | { 1579 | "data": { 1580 | "application/vnd.jupyter.widget-view+json": { 1581 | "model_id": "440fe94d34324ed880de4b8f97e8ca68", 1582 | "version_major": 2, 1583 | "version_minor": 0 1584 | }, 1585 | "text/plain": [ 1586 | "HBox(children=(IntProgress(value=0, max=39058), HTML(value='')))" 1587 | ] 1588 | }, 1589 | "metadata": {}, 1590 | "output_type": "display_data" 1591 | }, 1592 | { 1593 | "name": "stdout", 1594 | "output_type": "stream", 1595 | "text": [ 1596 | "\n", 1597 | "ending worker 3\n", 1598 | "starting worker 32\n" 1599 | ] 1600 | }, 1601 | { 1602 | "data": { 1603 | "application/vnd.jupyter.widget-view+json": { 1604 | "model_id": "54a3fb1ddd6d4bcc8162d66224b2cc5a", 1605 | "version_major": 2, 1606 | "version_minor": 0 1607 | }, 1608 | "text/plain": [ 1609 | "HBox(children=(IntProgress(value=0, max=39080), HTML(value='')))" 1610 | ] 1611 | }, 1612 | "metadata": {}, 1613 | "output_type": "display_data" 1614 | }, 1615 | { 1616 | "name": "stdout", 1617 | "output_type": "stream", 1618 | "text": [ 1619 | "\n", 1620 | "ending worker 13\n", 1621 | "starting worker 33\n", 1622 | "\n" 1623 | ] 1624 | }, 1625 | { 1626 | "data": { 1627 | "application/vnd.jupyter.widget-view+json": { 1628 | "model_id": "14f42c707acf41658665725876a6f61b", 1629 | "version_major": 2, 1630 | "version_minor": 0 1631 | }, 1632 | "text/plain": [ 1633 | "HBox(children=(IntProgress(value=0, max=39072), HTML(value='')))" 1634 | ] 1635 | }, 1636 | "metadata": {}, 1637 | "output_type": "display_data" 1638 | }, 1639 | { 1640 | "name": "stdout", 1641 | "output_type": "stream", 1642 | "text": [ 1643 | "\n", 1644 | "ending worker 12\n", 1645 | "starting worker 34\n" 1646 | ] 1647 | }, 1648 | { 1649 | "data": { 1650 | "application/vnd.jupyter.widget-view+json": { 1651 | "model_id": "61bd8aa5226a40e9b2fd39219ee80c39", 1652 | "version_major": 2, 1653 | "version_minor": 0 1654 | }, 1655 | "text/plain": [ 1656 | "HBox(children=(IntProgress(value=0, max=39073), HTML(value='')))" 1657 | ] 1658 | }, 1659 | "metadata": {}, 1660 | "output_type": "display_data" 1661 | }, 1662 | { 1663 | "name": "stdout", 1664 | "output_type": "stream", 1665 | "text": [ 1666 | "ending worker 16\n", 1667 | "starting worker 35\n" 1668 | ] 1669 | }, 1670 | { 1671 | "data": { 1672 | "application/vnd.jupyter.widget-view+json": { 1673 | "model_id": "f5ba91c875fa40d3a1fc51ffc1d69fa0", 1674 | "version_major": 2, 1675 | "version_minor": 0 1676 | }, 1677 | "text/plain": [ 1678 | "HBox(children=(IntProgress(value=0, max=39094), HTML(value='')))" 1679 | ] 1680 | }, 1681 | "metadata": {}, 1682 | "output_type": "display_data" 1683 | }, 1684 | { 1685 | "name": "stderr", 1686 | "output_type": "stream", 1687 | "text": [ 1688 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1689 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1690 | ] 1691 | }, 1692 | { 1693 | "name": "stdout", 1694 | "output_type": "stream", 1695 | "text": [ 1696 | "\n", 1697 | "ending worker 9\n", 1698 | "starting worker 36\n" 1699 | ] 1700 | }, 1701 | { 1702 | "data": { 1703 | "application/vnd.jupyter.widget-view+json": { 1704 | "model_id": "4e9a1907b9044b74b09082a44c93665f", 1705 | "version_major": 2, 1706 | "version_minor": 0 1707 | }, 1708 | "text/plain": [ 1709 | "HBox(children=(IntProgress(value=0, max=39106), HTML(value='')))" 1710 | ] 1711 | }, 1712 | "metadata": {}, 1713 | "output_type": "display_data" 1714 | }, 1715 | { 1716 | "name": "stdout", 1717 | "output_type": "stream", 1718 | "text": [ 1719 | "\n", 1720 | "ending worker 6\n", 1721 | "starting worker 37\n" 1722 | ] 1723 | }, 1724 | { 1725 | "data": { 1726 | "application/vnd.jupyter.widget-view+json": { 1727 | "model_id": "3f7770c9a0f94968bf0dd05c7123361f", 1728 | "version_major": 2, 1729 | "version_minor": 0 1730 | }, 1731 | "text/plain": [ 1732 | "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))" 1733 | ] 1734 | }, 1735 | "metadata": {}, 1736 | "output_type": "display_data" 1737 | }, 1738 | { 1739 | "name": "stdout", 1740 | "output_type": "stream", 1741 | "text": [ 1742 | "\n", 1743 | "ending worker 5\n", 1744 | "starting worker 38\n" 1745 | ] 1746 | }, 1747 | { 1748 | "data": { 1749 | "application/vnd.jupyter.widget-view+json": { 1750 | "model_id": "7c30ce8e8850483ca10cb1735d7ee491", 1751 | "version_major": 2, 1752 | "version_minor": 0 1753 | }, 1754 | "text/plain": [ 1755 | "HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))" 1756 | ] 1757 | }, 1758 | "metadata": {}, 1759 | "output_type": "display_data" 1760 | }, 1761 | { 1762 | "name": "stdout", 1763 | "output_type": "stream", 1764 | "text": [ 1765 | "\n", 1766 | "ending worker 4\n", 1767 | "starting worker 39\n" 1768 | ] 1769 | }, 1770 | { 1771 | "data": { 1772 | "application/vnd.jupyter.widget-view+json": { 1773 | "model_id": "c05ceea5df6a491591ac7370462edd29", 1774 | "version_major": 2, 1775 | "version_minor": 0 1776 | }, 1777 | "text/plain": [ 1778 | "HBox(children=(IntProgress(value=0, max=39041), HTML(value='')))" 1779 | ] 1780 | }, 1781 | "metadata": {}, 1782 | "output_type": "display_data" 1783 | }, 1784 | { 1785 | "name": "stderr", 1786 | "output_type": "stream", 1787 | "text": [ 1788 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1789 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1790 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1791 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1792 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1793 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1794 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1795 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1796 | ] 1797 | }, 1798 | { 1799 | "name": "stdout", 1800 | "output_type": "stream", 1801 | "text": [ 1802 | "\n", 1803 | "ending worker 20\n", 1804 | "starting worker 40\n" 1805 | ] 1806 | }, 1807 | { 1808 | "data": { 1809 | "application/vnd.jupyter.widget-view+json": { 1810 | "model_id": "c758ba50473345f3913a8773e564d5ef", 1811 | "version_major": 2, 1812 | "version_minor": 0 1813 | }, 1814 | "text/plain": [ 1815 | "HBox(children=(IntProgress(value=0, max=39098), HTML(value='')))" 1816 | ] 1817 | }, 1818 | "metadata": {}, 1819 | "output_type": "display_data" 1820 | }, 1821 | { 1822 | "name": "stderr", 1823 | "output_type": "stream", 1824 | "text": [ 1825 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1826 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1827 | ] 1828 | }, 1829 | { 1830 | "name": "stdout", 1831 | "output_type": "stream", 1832 | "text": [ 1833 | "\n", 1834 | "ending worker 21\n", 1835 | "starting worker 41\n" 1836 | ] 1837 | }, 1838 | { 1839 | "data": { 1840 | "application/vnd.jupyter.widget-view+json": { 1841 | "model_id": "e033b743ebce4cdeaab35ffebc70f755", 1842 | "version_major": 2, 1843 | "version_minor": 0 1844 | }, 1845 | "text/plain": [ 1846 | "HBox(children=(IntProgress(value=0, max=39046), HTML(value='')))" 1847 | ] 1848 | }, 1849 | "metadata": {}, 1850 | "output_type": "display_data" 1851 | }, 1852 | { 1853 | "name": "stderr", 1854 | "output_type": "stream", 1855 | "text": [ 1856 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1857 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1858 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1859 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 1860 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 1861 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 1862 | ] 1863 | }, 1864 | { 1865 | "name": "stdout", 1866 | "output_type": "stream", 1867 | "text": [ 1868 | "\n", 1869 | "ending worker 22\n", 1870 | "starting worker 42\n" 1871 | ] 1872 | }, 1873 | { 1874 | "data": { 1875 | "application/vnd.jupyter.widget-view+json": { 1876 | "model_id": "d3ad7909814c4659b561957fc2ac8add", 1877 | "version_major": 2, 1878 | "version_minor": 0 1879 | }, 1880 | "text/plain": [ 1881 | "HBox(children=(IntProgress(value=0, max=39022), HTML(value='')))" 1882 | ] 1883 | }, 1884 | "metadata": {}, 1885 | "output_type": "display_data" 1886 | }, 1887 | { 1888 | "name": "stdout", 1889 | "output_type": "stream", 1890 | "text": [ 1891 | "\n", 1892 | "ending worker 23\n", 1893 | "starting worker 43\n" 1894 | ] 1895 | }, 1896 | { 1897 | "data": { 1898 | "application/vnd.jupyter.widget-view+json": { 1899 | "model_id": "5050873972124566b3efc14d7d017e23", 1900 | "version_major": 2, 1901 | "version_minor": 0 1902 | }, 1903 | "text/plain": [ 1904 | "HBox(children=(IntProgress(value=0, max=39002), HTML(value='')))" 1905 | ] 1906 | }, 1907 | "metadata": {}, 1908 | "output_type": "display_data" 1909 | }, 1910 | { 1911 | "name": "stdout", 1912 | "output_type": "stream", 1913 | "text": [ 1914 | "\n", 1915 | "\n", 1916 | "\n", 1917 | "ending worker 29\n", 1918 | "starting worker 44\n" 1919 | ] 1920 | }, 1921 | { 1922 | "data": { 1923 | "application/vnd.jupyter.widget-view+json": { 1924 | "model_id": "fe133f2eed7e483e96b4c958a49a741d", 1925 | "version_major": 2, 1926 | "version_minor": 0 1927 | }, 1928 | "text/plain": [ 1929 | "HBox(children=(IntProgress(value=0, max=39027), HTML(value='')))" 1930 | ] 1931 | }, 1932 | "metadata": {}, 1933 | "output_type": "display_data" 1934 | }, 1935 | { 1936 | "name": "stdout", 1937 | "output_type": "stream", 1938 | "text": [ 1939 | "ending worker 24\n", 1940 | "starting worker 45\n", 1941 | "ending worker 25\n", 1942 | "starting worker 46\n" 1943 | ] 1944 | }, 1945 | { 1946 | "data": { 1947 | "application/vnd.jupyter.widget-view+json": { 1948 | "model_id": "86fd5e12ba184476b7364b35445cbdb5", 1949 | "version_major": 2, 1950 | "version_minor": 0 1951 | }, 1952 | "text/plain": [ 1953 | "HBox(children=(IntProgress(value=0, max=39035), HTML(value='')))" 1954 | ] 1955 | }, 1956 | "metadata": {}, 1957 | "output_type": "display_data" 1958 | }, 1959 | { 1960 | "data": { 1961 | "application/vnd.jupyter.widget-view+json": { 1962 | "model_id": "94dfca9b34a34af1abf3d2699ceb5a6b", 1963 | "version_major": 2, 1964 | "version_minor": 0 1965 | }, 1966 | "text/plain": [ 1967 | "HBox(children=(IntProgress(value=0, max=39069), HTML(value='')))" 1968 | ] 1969 | }, 1970 | "metadata": {}, 1971 | "output_type": "display_data" 1972 | }, 1973 | { 1974 | "name": "stdout", 1975 | "output_type": "stream", 1976 | "text": [ 1977 | "\n", 1978 | "ending worker 32\n", 1979 | "starting worker 47\n" 1980 | ] 1981 | }, 1982 | { 1983 | "data": { 1984 | "application/vnd.jupyter.widget-view+json": { 1985 | "model_id": "cbda2cf3f4d94cb19a883697303289c8", 1986 | "version_major": 2, 1987 | "version_minor": 0 1988 | }, 1989 | "text/plain": [ 1990 | "HBox(children=(IntProgress(value=0, max=39063), HTML(value='')))" 1991 | ] 1992 | }, 1993 | "metadata": {}, 1994 | "output_type": "display_data" 1995 | }, 1996 | { 1997 | "name": "stdout", 1998 | "output_type": "stream", 1999 | "text": [ 2000 | "\n" 2001 | ] 2002 | }, 2003 | { 2004 | "name": "stderr", 2005 | "output_type": "stream", 2006 | "text": [ 2007 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2008 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 2009 | ] 2010 | }, 2011 | { 2012 | "name": "stdout", 2013 | "output_type": "stream", 2014 | "text": [ 2015 | "ending worker 28\n", 2016 | "starting worker 48\n" 2017 | ] 2018 | }, 2019 | { 2020 | "data": { 2021 | "application/vnd.jupyter.widget-view+json": { 2022 | "model_id": "267952289e5843118529ceef8a663448", 2023 | "version_major": 2, 2024 | "version_minor": 0 2025 | }, 2026 | "text/plain": [ 2027 | "HBox(children=(IntProgress(value=0, max=39090), HTML(value='')))" 2028 | ] 2029 | }, 2030 | "metadata": {}, 2031 | "output_type": "display_data" 2032 | }, 2033 | { 2034 | "name": "stdout", 2035 | "output_type": "stream", 2036 | "text": [ 2037 | "\n", 2038 | "ending worker 30\n", 2039 | "starting worker 49\n" 2040 | ] 2041 | }, 2042 | { 2043 | "data": { 2044 | "application/vnd.jupyter.widget-view+json": { 2045 | "model_id": "37e90eb08503473c9b293d01432c1a4f", 2046 | "version_major": 2, 2047 | "version_minor": 0 2048 | }, 2049 | "text/plain": [ 2050 | "HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))" 2051 | ] 2052 | }, 2053 | "metadata": {}, 2054 | "output_type": "display_data" 2055 | }, 2056 | { 2057 | "name": "stdout", 2058 | "output_type": "stream", 2059 | "text": [ 2060 | "\n", 2061 | "ending worker 26\n", 2062 | "starting worker 50\n" 2063 | ] 2064 | }, 2065 | { 2066 | "data": { 2067 | "application/vnd.jupyter.widget-view+json": { 2068 | "model_id": "36465ac60b1c47c9bb51169aae25452a", 2069 | "version_major": 2, 2070 | "version_minor": 0 2071 | }, 2072 | "text/plain": [ 2073 | "HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))" 2074 | ] 2075 | }, 2076 | "metadata": {}, 2077 | "output_type": "display_data" 2078 | }, 2079 | { 2080 | "name": "stdout", 2081 | "output_type": "stream", 2082 | "text": [ 2083 | "\n", 2084 | "\n", 2085 | "ending worker 33\n", 2086 | "starting worker 51\n" 2087 | ] 2088 | }, 2089 | { 2090 | "data": { 2091 | "application/vnd.jupyter.widget-view+json": { 2092 | "model_id": "58f1b7e8a23a4800aa57f8ca3956f3c3", 2093 | "version_major": 2, 2094 | "version_minor": 0 2095 | }, 2096 | "text/plain": [ 2097 | "HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))" 2098 | ] 2099 | }, 2100 | "metadata": {}, 2101 | "output_type": "display_data" 2102 | }, 2103 | { 2104 | "name": "stdout", 2105 | "output_type": "stream", 2106 | "text": [ 2107 | "ending worker 34\n", 2108 | "starting worker 52\n" 2109 | ] 2110 | }, 2111 | { 2112 | "data": { 2113 | "application/vnd.jupyter.widget-view+json": { 2114 | "model_id": "33ec1219dacc451faf9075f8a79e1cb6", 2115 | "version_major": 2, 2116 | "version_minor": 0 2117 | }, 2118 | "text/plain": [ 2119 | "HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))" 2120 | ] 2121 | }, 2122 | "metadata": {}, 2123 | "output_type": "display_data" 2124 | }, 2125 | { 2126 | "name": "stdout", 2127 | "output_type": "stream", 2128 | "text": [ 2129 | "\n", 2130 | "ending worker 35\n", 2131 | "starting worker 53\n" 2132 | ] 2133 | }, 2134 | { 2135 | "data": { 2136 | "application/vnd.jupyter.widget-view+json": { 2137 | "model_id": "429f6420f5ef49e2b7606fab71e9730e", 2138 | "version_major": 2, 2139 | "version_minor": 0 2140 | }, 2141 | "text/plain": [ 2142 | "HBox(children=(IntProgress(value=0, max=39037), HTML(value='')))" 2143 | ] 2144 | }, 2145 | "metadata": {}, 2146 | "output_type": "display_data" 2147 | }, 2148 | { 2149 | "name": "stdout", 2150 | "output_type": "stream", 2151 | "text": [ 2152 | "\n", 2153 | "ending worker 27\n", 2154 | "starting worker 54\n" 2155 | ] 2156 | }, 2157 | { 2158 | "data": { 2159 | "application/vnd.jupyter.widget-view+json": { 2160 | "model_id": "d05d9a21a08a43329f185cb65b175959", 2161 | "version_major": 2, 2162 | "version_minor": 0 2163 | }, 2164 | "text/plain": [ 2165 | "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))" 2166 | ] 2167 | }, 2168 | "metadata": {}, 2169 | "output_type": "display_data" 2170 | }, 2171 | { 2172 | "name": "stdout", 2173 | "output_type": "stream", 2174 | "text": [ 2175 | "\n", 2176 | "\n", 2177 | "ending worker 37\n", 2178 | "starting worker 55\n" 2179 | ] 2180 | }, 2181 | { 2182 | "data": { 2183 | "application/vnd.jupyter.widget-view+json": { 2184 | "model_id": "c12b60ec0a8949128441fd6abc1eb20d", 2185 | "version_major": 2, 2186 | "version_minor": 0 2187 | }, 2188 | "text/plain": [ 2189 | "HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))" 2190 | ] 2191 | }, 2192 | "metadata": {}, 2193 | "output_type": "display_data" 2194 | }, 2195 | { 2196 | "name": "stdout", 2197 | "output_type": "stream", 2198 | "text": [ 2199 | "ending worker 39\n", 2200 | "starting worker 56\n" 2201 | ] 2202 | }, 2203 | { 2204 | "data": { 2205 | "application/vnd.jupyter.widget-view+json": { 2206 | "model_id": "2ec85bc324f64c5bb9b338f9c9665bea", 2207 | "version_major": 2, 2208 | "version_minor": 0 2209 | }, 2210 | "text/plain": [ 2211 | "HBox(children=(IntProgress(value=0, max=39085), HTML(value='')))" 2212 | ] 2213 | }, 2214 | "metadata": {}, 2215 | "output_type": "display_data" 2216 | }, 2217 | { 2218 | "name": "stdout", 2219 | "output_type": "stream", 2220 | "text": [ 2221 | "\n", 2222 | "ending worker 31\n", 2223 | "starting worker 57\n" 2224 | ] 2225 | }, 2226 | { 2227 | "data": { 2228 | "application/vnd.jupyter.widget-view+json": { 2229 | "model_id": "f20bd7adf3ea42a59ea3c55fe3e934b6", 2230 | "version_major": 2, 2231 | "version_minor": 0 2232 | }, 2233 | "text/plain": [ 2234 | "HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))" 2235 | ] 2236 | }, 2237 | "metadata": {}, 2238 | "output_type": "display_data" 2239 | }, 2240 | { 2241 | "name": "stdout", 2242 | "output_type": "stream", 2243 | "text": [ 2244 | "\n", 2245 | "ending worker 36\n", 2246 | "starting worker 58\n" 2247 | ] 2248 | }, 2249 | { 2250 | "data": { 2251 | "application/vnd.jupyter.widget-view+json": { 2252 | "model_id": "35c4540da7154d3db45e55ec20343e7a", 2253 | "version_major": 2, 2254 | "version_minor": 0 2255 | }, 2256 | "text/plain": [ 2257 | "HBox(children=(IntProgress(value=0, max=39085), HTML(value='')))" 2258 | ] 2259 | }, 2260 | "metadata": {}, 2261 | "output_type": "display_data" 2262 | }, 2263 | { 2264 | "name": "stdout", 2265 | "output_type": "stream", 2266 | "text": [ 2267 | "\n", 2268 | "ending worker 38\n", 2269 | "starting worker 59\n" 2270 | ] 2271 | }, 2272 | { 2273 | "data": { 2274 | "application/vnd.jupyter.widget-view+json": { 2275 | "model_id": "3ab69318aa804f45a24122579fd3cb2c", 2276 | "version_major": 2, 2277 | "version_minor": 0 2278 | }, 2279 | "text/plain": [ 2280 | "HBox(children=(IntProgress(value=0, max=39051), HTML(value='')))" 2281 | ] 2282 | }, 2283 | "metadata": {}, 2284 | "output_type": "display_data" 2285 | }, 2286 | { 2287 | "name": "stderr", 2288 | "output_type": "stream", 2289 | "text": [ 2290 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2291 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2292 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2293 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2294 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2295 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 2296 | ] 2297 | }, 2298 | { 2299 | "name": "stdout", 2300 | "output_type": "stream", 2301 | "text": [ 2302 | "\n", 2303 | "ending worker 40\n", 2304 | "starting worker 60\n" 2305 | ] 2306 | }, 2307 | { 2308 | "data": { 2309 | "application/vnd.jupyter.widget-view+json": { 2310 | "model_id": "4f1c8a480cb54b1f95fcf5ed19eae1e8", 2311 | "version_major": 2, 2312 | "version_minor": 0 2313 | }, 2314 | "text/plain": [ 2315 | "HBox(children=(IntProgress(value=0, max=39077), HTML(value='')))" 2316 | ] 2317 | }, 2318 | "metadata": {}, 2319 | "output_type": "display_data" 2320 | }, 2321 | { 2322 | "name": "stderr", 2323 | "output_type": "stream", 2324 | "text": [ 2325 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2326 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 2327 | ] 2328 | }, 2329 | { 2330 | "name": "stdout", 2331 | "output_type": "stream", 2332 | "text": [ 2333 | "\n", 2334 | "ending worker 41\n", 2335 | "starting worker 61\n" 2336 | ] 2337 | }, 2338 | { 2339 | "data": { 2340 | "application/vnd.jupyter.widget-view+json": { 2341 | "model_id": "d7989c84f5574f97ac8d1d18b5a0bf1a", 2342 | "version_major": 2, 2343 | "version_minor": 0 2344 | }, 2345 | "text/plain": [ 2346 | "HBox(children=(IntProgress(value=0, max=39103), HTML(value='')))" 2347 | ] 2348 | }, 2349 | "metadata": {}, 2350 | "output_type": "display_data" 2351 | }, 2352 | { 2353 | "name": "stderr", 2354 | "output_type": "stream", 2355 | "text": [ 2356 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2357 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2358 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2359 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2360 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2361 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2362 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2363 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2364 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2365 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 2366 | ] 2367 | }, 2368 | { 2369 | "name": "stdout", 2370 | "output_type": "stream", 2371 | "text": [ 2372 | "\n", 2373 | "ending worker 42\n", 2374 | "starting worker 62\n" 2375 | ] 2376 | }, 2377 | { 2378 | "data": { 2379 | "application/vnd.jupyter.widget-view+json": { 2380 | "model_id": "769c2d9e4eee451586cdcc99f70522a0", 2381 | "version_major": 2, 2382 | "version_minor": 0 2383 | }, 2384 | "text/plain": [ 2385 | "HBox(children=(IntProgress(value=0, max=39051), HTML(value='')))" 2386 | ] 2387 | }, 2388 | "metadata": {}, 2389 | "output_type": "display_data" 2390 | }, 2391 | { 2392 | "name": "stdout", 2393 | "output_type": "stream", 2394 | "text": [ 2395 | "\n", 2396 | "\n", 2397 | "\n", 2398 | "\n", 2399 | "ending worker 58\n", 2400 | "starting worker 63\n" 2401 | ] 2402 | }, 2403 | { 2404 | "data": { 2405 | "application/vnd.jupyter.widget-view+json": { 2406 | "model_id": "838c7a79292d4402b999399e4dc389a3", 2407 | "version_major": 2, 2408 | "version_minor": 0 2409 | }, 2410 | "text/plain": [ 2411 | "HBox(children=(IntProgress(value=0, max=39050), HTML(value='')))" 2412 | ] 2413 | }, 2414 | "metadata": {}, 2415 | "output_type": "display_data" 2416 | }, 2417 | { 2418 | "name": "stdout", 2419 | "output_type": "stream", 2420 | "text": [ 2421 | "ending worker 43\n", 2422 | "starting worker 64\n", 2423 | "\n" 2424 | ] 2425 | }, 2426 | { 2427 | "data": { 2428 | "application/vnd.jupyter.widget-view+json": { 2429 | "model_id": "ac1b0d7e39054a0fb29823de2be74fdb", 2430 | "version_major": 2, 2431 | "version_minor": 0 2432 | }, 2433 | "text/plain": [ 2434 | "HBox(children=(IntProgress(value=0, max=39109), HTML(value='')))" 2435 | ] 2436 | }, 2437 | "metadata": {}, 2438 | "output_type": "display_data" 2439 | }, 2440 | { 2441 | "name": "stdout", 2442 | "output_type": "stream", 2443 | "text": [ 2444 | "ending worker 54\n", 2445 | "starting worker 65\n" 2446 | ] 2447 | }, 2448 | { 2449 | "data": { 2450 | "application/vnd.jupyter.widget-view+json": { 2451 | "model_id": "5fc9dbfd318e416baa48aa1142c6ad2e", 2452 | "version_major": 2, 2453 | "version_minor": 0 2454 | }, 2455 | "text/plain": [ 2456 | "HBox(children=(IntProgress(value=0, max=39093), HTML(value='')))" 2457 | ] 2458 | }, 2459 | "metadata": {}, 2460 | "output_type": "display_data" 2461 | }, 2462 | { 2463 | "name": "stdout", 2464 | "output_type": "stream", 2465 | "text": [ 2466 | "ending worker 56\n", 2467 | "starting worker 66\n" 2468 | ] 2469 | }, 2470 | { 2471 | "data": { 2472 | "application/vnd.jupyter.widget-view+json": { 2473 | "model_id": "6bd6f6ffde694b2cbb8d302561da3462", 2474 | "version_major": 2, 2475 | "version_minor": 0 2476 | }, 2477 | "text/plain": [ 2478 | "HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))" 2479 | ] 2480 | }, 2481 | "metadata": {}, 2482 | "output_type": "display_data" 2483 | }, 2484 | { 2485 | "name": "stdout", 2486 | "output_type": "stream", 2487 | "text": [ 2488 | "ending worker 47\n", 2489 | "starting worker 67\n" 2490 | ] 2491 | }, 2492 | { 2493 | "data": { 2494 | "application/vnd.jupyter.widget-view+json": { 2495 | "model_id": "76ebec5cbb4147359012a1a2643eb21e", 2496 | "version_major": 2, 2497 | "version_minor": 0 2498 | }, 2499 | "text/plain": [ 2500 | "HBox(children=(IntProgress(value=0, max=39134), HTML(value='')))" 2501 | ] 2502 | }, 2503 | "metadata": {}, 2504 | "output_type": "display_data" 2505 | }, 2506 | { 2507 | "name": "stdout", 2508 | "output_type": "stream", 2509 | "text": [ 2510 | "\n", 2511 | "\n", 2512 | "\n", 2513 | "ending worker 51\n", 2514 | "starting worker 68\n" 2515 | ] 2516 | }, 2517 | { 2518 | "data": { 2519 | "application/vnd.jupyter.widget-view+json": { 2520 | "model_id": "df29f98ab86b43bbb24bcadcfdf4f953", 2521 | "version_major": 2, 2522 | "version_minor": 0 2523 | }, 2524 | "text/plain": [ 2525 | "HBox(children=(IntProgress(value=0, max=39056), HTML(value='')))" 2526 | ] 2527 | }, 2528 | "metadata": {}, 2529 | "output_type": "display_data" 2530 | }, 2531 | { 2532 | "name": "stdout", 2533 | "output_type": "stream", 2534 | "text": [ 2535 | "\n", 2536 | "ending worker 44\n", 2537 | "starting worker 69\n" 2538 | ] 2539 | }, 2540 | { 2541 | "data": { 2542 | "application/vnd.jupyter.widget-view+json": { 2543 | "model_id": "ee02a86fd50e48bc86a5388c2de2053e", 2544 | "version_major": 2, 2545 | "version_minor": 0 2546 | }, 2547 | "text/plain": [ 2548 | "HBox(children=(IntProgress(value=0, max=39060), HTML(value='')))" 2549 | ] 2550 | }, 2551 | "metadata": {}, 2552 | "output_type": "display_data" 2553 | }, 2554 | { 2555 | "name": "stdout", 2556 | "output_type": "stream", 2557 | "text": [ 2558 | "ending worker 59\n", 2559 | "starting worker 70\n" 2560 | ] 2561 | }, 2562 | { 2563 | "data": { 2564 | "application/vnd.jupyter.widget-view+json": { 2565 | "model_id": "90f31849bf7b4f7cb529aa71f837590a", 2566 | "version_major": 2, 2567 | "version_minor": 0 2568 | }, 2569 | "text/plain": [ 2570 | "HBox(children=(IntProgress(value=0, max=39083), HTML(value='')))" 2571 | ] 2572 | }, 2573 | "metadata": {}, 2574 | "output_type": "display_data" 2575 | }, 2576 | { 2577 | "name": "stdout", 2578 | "output_type": "stream", 2579 | "text": [ 2580 | "\n", 2581 | "ending worker 48\n", 2582 | "starting worker 71\n", 2583 | "\n" 2584 | ] 2585 | }, 2586 | { 2587 | "data": { 2588 | "application/vnd.jupyter.widget-view+json": { 2589 | "model_id": "6fc94bb950184496838bd2c55c65ead5", 2590 | "version_major": 2, 2591 | "version_minor": 0 2592 | }, 2593 | "text/plain": [ 2594 | "HBox(children=(IntProgress(value=0, max=39027), HTML(value='')))" 2595 | ] 2596 | }, 2597 | "metadata": {}, 2598 | "output_type": "display_data" 2599 | }, 2600 | { 2601 | "name": "stdout", 2602 | "output_type": "stream", 2603 | "text": [ 2604 | "\n", 2605 | "ending worker 52\n", 2606 | "starting worker 72\n" 2607 | ] 2608 | }, 2609 | { 2610 | "data": { 2611 | "application/vnd.jupyter.widget-view+json": { 2612 | "model_id": "217c635a9c674c0181a4422eea7281b9", 2613 | "version_major": 2, 2614 | "version_minor": 0 2615 | }, 2616 | "text/plain": [ 2617 | "HBox(children=(IntProgress(value=0, max=39077), HTML(value='')))" 2618 | ] 2619 | }, 2620 | "metadata": {}, 2621 | "output_type": "display_data" 2622 | }, 2623 | { 2624 | "name": "stdout", 2625 | "output_type": "stream", 2626 | "text": [ 2627 | "\n", 2628 | "ending worker 57\n", 2629 | "starting worker 73\n", 2630 | "ending worker 45\n", 2631 | "starting worker 74\n" 2632 | ] 2633 | }, 2634 | { 2635 | "data": { 2636 | "application/vnd.jupyter.widget-view+json": { 2637 | "model_id": "de823677005245d19eec0fafdf195500", 2638 | "version_major": 2, 2639 | "version_minor": 0 2640 | }, 2641 | "text/plain": [ 2642 | "HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))" 2643 | ] 2644 | }, 2645 | "metadata": {}, 2646 | "output_type": "display_data" 2647 | }, 2648 | { 2649 | "name": "stdout", 2650 | "output_type": "stream", 2651 | "text": [ 2652 | "\n" 2653 | ] 2654 | }, 2655 | { 2656 | "data": { 2657 | "application/vnd.jupyter.widget-view+json": { 2658 | "model_id": "07fbefd5d7494cbfbc74738ed7a8c74b", 2659 | "version_major": 2, 2660 | "version_minor": 0 2661 | }, 2662 | "text/plain": [ 2663 | "HBox(children=(IntProgress(value=0, max=39061), HTML(value='')))" 2664 | ] 2665 | }, 2666 | "metadata": {}, 2667 | "output_type": "display_data" 2668 | }, 2669 | { 2670 | "name": "stdout", 2671 | "output_type": "stream", 2672 | "text": [ 2673 | "ending worker 50\n", 2674 | "starting worker 75\n" 2675 | ] 2676 | }, 2677 | { 2678 | "data": { 2679 | "application/vnd.jupyter.widget-view+json": { 2680 | "model_id": "bf21810e00cc4107bde7d915c8561d42", 2681 | "version_major": 2, 2682 | "version_minor": 0 2683 | }, 2684 | "text/plain": [ 2685 | "HBox(children=(IntProgress(value=0, max=39030), HTML(value='')))" 2686 | ] 2687 | }, 2688 | "metadata": {}, 2689 | "output_type": "display_data" 2690 | }, 2691 | { 2692 | "name": "stdout", 2693 | "output_type": "stream", 2694 | "text": [ 2695 | "ending worker 53\n", 2696 | "starting worker 76\n" 2697 | ] 2698 | }, 2699 | { 2700 | "data": { 2701 | "application/vnd.jupyter.widget-view+json": { 2702 | "model_id": "6228190b82664e1ea3dd9a134bb7ec6c", 2703 | "version_major": 2, 2704 | "version_minor": 0 2705 | }, 2706 | "text/plain": [ 2707 | "HBox(children=(IntProgress(value=0, max=39092), HTML(value='')))" 2708 | ] 2709 | }, 2710 | "metadata": {}, 2711 | "output_type": "display_data" 2712 | }, 2713 | { 2714 | "name": "stdout", 2715 | "output_type": "stream", 2716 | "text": [ 2717 | "\n", 2718 | "ending worker 55\n", 2719 | "starting worker 77\n" 2720 | ] 2721 | }, 2722 | { 2723 | "data": { 2724 | "application/vnd.jupyter.widget-view+json": { 2725 | "model_id": "8735b6bdf960436ba8bcfdd42e7195c6", 2726 | "version_major": 2, 2727 | "version_minor": 0 2728 | }, 2729 | "text/plain": [ 2730 | "HBox(children=(IntProgress(value=0, max=39061), HTML(value='')))" 2731 | ] 2732 | }, 2733 | "metadata": {}, 2734 | "output_type": "display_data" 2735 | }, 2736 | { 2737 | "name": "stdout", 2738 | "output_type": "stream", 2739 | "text": [ 2740 | "\n", 2741 | "ending worker 46\n", 2742 | "starting worker 78\n" 2743 | ] 2744 | }, 2745 | { 2746 | "data": { 2747 | "application/vnd.jupyter.widget-view+json": { 2748 | "model_id": "2f1f2e288f4744c6a9a3fc251e09281a", 2749 | "version_major": 2, 2750 | "version_minor": 0 2751 | }, 2752 | "text/plain": [ 2753 | "HBox(children=(IntProgress(value=0, max=39064), HTML(value='')))" 2754 | ] 2755 | }, 2756 | "metadata": {}, 2757 | "output_type": "display_data" 2758 | }, 2759 | { 2760 | "name": "stdout", 2761 | "output_type": "stream", 2762 | "text": [ 2763 | "\n", 2764 | "ending worker 49\n", 2765 | "starting worker 79\n" 2766 | ] 2767 | }, 2768 | { 2769 | "data": { 2770 | "application/vnd.jupyter.widget-view+json": { 2771 | "model_id": "452f02475ed7405b8596ec5e16cbf8fb", 2772 | "version_major": 2, 2773 | "version_minor": 0 2774 | }, 2775 | "text/plain": [ 2776 | "HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))" 2777 | ] 2778 | }, 2779 | "metadata": {}, 2780 | "output_type": "display_data" 2781 | }, 2782 | { 2783 | "name": "stderr", 2784 | "output_type": "stream", 2785 | "text": [ 2786 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2787 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2788 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2789 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2790 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2791 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2792 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2793 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2794 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2795 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 2796 | ] 2797 | }, 2798 | { 2799 | "name": "stdout", 2800 | "output_type": "stream", 2801 | "text": [ 2802 | "\n", 2803 | "ending worker 60\n", 2804 | "starting worker 80\n" 2805 | ] 2806 | }, 2807 | { 2808 | "data": { 2809 | "application/vnd.jupyter.widget-view+json": { 2810 | "model_id": "cad046af0bef46dc8fcd4acc7739b47d", 2811 | "version_major": 2, 2812 | "version_minor": 0 2813 | }, 2814 | "text/plain": [ 2815 | "HBox(children=(IntProgress(value=0, max=39073), HTML(value='')))" 2816 | ] 2817 | }, 2818 | "metadata": {}, 2819 | "output_type": "display_data" 2820 | }, 2821 | { 2822 | "name": "stdout", 2823 | "output_type": "stream", 2824 | "text": [ 2825 | "\n", 2826 | "ending worker 61\n", 2827 | "starting worker 81\n" 2828 | ] 2829 | }, 2830 | { 2831 | "data": { 2832 | "application/vnd.jupyter.widget-view+json": { 2833 | "model_id": "ec8f7366b0fb4b4486ebfc4532a2c603", 2834 | "version_major": 2, 2835 | "version_minor": 0 2836 | }, 2837 | "text/plain": [ 2838 | "HBox(children=(IntProgress(value=0, max=39080), HTML(value='')))" 2839 | ] 2840 | }, 2841 | "metadata": {}, 2842 | "output_type": "display_data" 2843 | }, 2844 | { 2845 | "name": "stderr", 2846 | "output_type": "stream", 2847 | "text": [ 2848 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2849 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2850 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2851 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2852 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2853 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2854 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2855 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 2856 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 2857 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 2858 | ] 2859 | }, 2860 | { 2861 | "name": "stdout", 2862 | "output_type": "stream", 2863 | "text": [ 2864 | "\n", 2865 | "ending worker 62\n", 2866 | "starting worker 82\n" 2867 | ] 2868 | }, 2869 | { 2870 | "data": { 2871 | "application/vnd.jupyter.widget-view+json": { 2872 | "model_id": "dd9968071b35490ca3a0f7cb6660abf2", 2873 | "version_major": 2, 2874 | "version_minor": 0 2875 | }, 2876 | "text/plain": [ 2877 | "HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))" 2878 | ] 2879 | }, 2880 | "metadata": {}, 2881 | "output_type": "display_data" 2882 | }, 2883 | { 2884 | "name": "stdout", 2885 | "output_type": "stream", 2886 | "text": [ 2887 | "\n", 2888 | "ending worker 78\n", 2889 | "starting worker 83\n" 2890 | ] 2891 | }, 2892 | { 2893 | "data": { 2894 | "application/vnd.jupyter.widget-view+json": { 2895 | "model_id": "39cf94f0db3941b0a9236a6496ea24c2", 2896 | "version_major": 2, 2897 | "version_minor": 0 2898 | }, 2899 | "text/plain": [ 2900 | "HBox(children=(IntProgress(value=0, max=39100), HTML(value='')))" 2901 | ] 2902 | }, 2903 | "metadata": {}, 2904 | "output_type": "display_data" 2905 | }, 2906 | { 2907 | "name": "stdout", 2908 | "output_type": "stream", 2909 | "text": [ 2910 | "\n", 2911 | "ending worker 75\n", 2912 | "starting worker 84\n", 2913 | "\n" 2914 | ] 2915 | }, 2916 | { 2917 | "data": { 2918 | "application/vnd.jupyter.widget-view+json": { 2919 | "model_id": "f4947b4fb04d4f1f8743e78c864220ed", 2920 | "version_major": 2, 2921 | "version_minor": 0 2922 | }, 2923 | "text/plain": [ 2924 | "HBox(children=(IntProgress(value=0, max=39063), HTML(value='')))" 2925 | ] 2926 | }, 2927 | "metadata": {}, 2928 | "output_type": "display_data" 2929 | }, 2930 | { 2931 | "name": "stdout", 2932 | "output_type": "stream", 2933 | "text": [ 2934 | "\n", 2935 | "ending worker 76\n", 2936 | "starting worker 85\n" 2937 | ] 2938 | }, 2939 | { 2940 | "data": { 2941 | "application/vnd.jupyter.widget-view+json": { 2942 | "model_id": "0c798c35b32844c1adfe862fd6f457c6", 2943 | "version_major": 2, 2944 | "version_minor": 0 2945 | }, 2946 | "text/plain": [ 2947 | "HBox(children=(IntProgress(value=0, max=39064), HTML(value='')))" 2948 | ] 2949 | }, 2950 | "metadata": {}, 2951 | "output_type": "display_data" 2952 | }, 2953 | { 2954 | "name": "stdout", 2955 | "output_type": "stream", 2956 | "text": [ 2957 | "ending worker 74\n", 2958 | "starting worker 86\n", 2959 | "\n" 2960 | ] 2961 | }, 2962 | { 2963 | "data": { 2964 | "application/vnd.jupyter.widget-view+json": { 2965 | "model_id": "26915fb5e9ab44088de9a7009a3e0ddd", 2966 | "version_major": 2, 2967 | "version_minor": 0 2968 | }, 2969 | "text/plain": [ 2970 | "HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))" 2971 | ] 2972 | }, 2973 | "metadata": {}, 2974 | "output_type": "display_data" 2975 | }, 2976 | { 2977 | "name": "stdout", 2978 | "output_type": "stream", 2979 | "text": [ 2980 | "ending worker 77\n", 2981 | "starting worker 87\n" 2982 | ] 2983 | }, 2984 | { 2985 | "data": { 2986 | "application/vnd.jupyter.widget-view+json": { 2987 | "model_id": "c3f6b2c3ad1a4f6491276cdc4ed9c13c", 2988 | "version_major": 2, 2989 | "version_minor": 0 2990 | }, 2991 | "text/plain": [ 2992 | "HBox(children=(IntProgress(value=0, max=39044), HTML(value='')))" 2993 | ] 2994 | }, 2995 | "metadata": {}, 2996 | "output_type": "display_data" 2997 | }, 2998 | { 2999 | "name": "stdout", 3000 | "output_type": "stream", 3001 | "text": [ 3002 | "\n", 3003 | "ending worker 79\n", 3004 | "starting worker 88\n" 3005 | ] 3006 | }, 3007 | { 3008 | "data": { 3009 | "application/vnd.jupyter.widget-view+json": { 3010 | "model_id": "17db6d070c724a42ac909302d2cf1177", 3011 | "version_major": 2, 3012 | "version_minor": 0 3013 | }, 3014 | "text/plain": [ 3015 | "HBox(children=(IntProgress(value=0, max=39036), HTML(value='')))" 3016 | ] 3017 | }, 3018 | "metadata": {}, 3019 | "output_type": "display_data" 3020 | }, 3021 | { 3022 | "name": "stdout", 3023 | "output_type": "stream", 3024 | "text": [ 3025 | "\n", 3026 | "\n", 3027 | "ending worker 66\n", 3028 | "starting worker 89\n" 3029 | ] 3030 | }, 3031 | { 3032 | "data": { 3033 | "application/vnd.jupyter.widget-view+json": { 3034 | "model_id": "16735bc9eb5d4311a040bb3db7ace6ae", 3035 | "version_major": 2, 3036 | "version_minor": 0 3037 | }, 3038 | "text/plain": [ 3039 | "HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))" 3040 | ] 3041 | }, 3042 | "metadata": {}, 3043 | "output_type": "display_data" 3044 | }, 3045 | { 3046 | "name": "stdout", 3047 | "output_type": "stream", 3048 | "text": [ 3049 | "ending worker 69\n", 3050 | "starting worker 90\n" 3051 | ] 3052 | }, 3053 | { 3054 | "data": { 3055 | "application/vnd.jupyter.widget-view+json": { 3056 | "model_id": "69307cf8a4ef42e4983787782f56ad6e", 3057 | "version_major": 2, 3058 | "version_minor": 0 3059 | }, 3060 | "text/plain": [ 3061 | "HBox(children=(IntProgress(value=0, max=28537), HTML(value='')))" 3062 | ] 3063 | }, 3064 | "metadata": {}, 3065 | "output_type": "display_data" 3066 | }, 3067 | { 3068 | "name": "stdout", 3069 | "output_type": "stream", 3070 | "text": [ 3071 | "\n", 3072 | "\n", 3073 | "\n", 3074 | "ending worker 73\n", 3075 | "starting worker 100\n" 3076 | ] 3077 | }, 3078 | { 3079 | "data": { 3080 | "application/vnd.jupyter.widget-view+json": { 3081 | "model_id": "be3951fa96644d1a8417b8d4c7647df0", 3082 | "version_major": 2, 3083 | "version_minor": 0 3084 | }, 3085 | "text/plain": [ 3086 | "HBox(children=(IntProgress(value=0, max=1), HTML(value='')))" 3087 | ] 3088 | }, 3089 | "metadata": {}, 3090 | "output_type": "display_data" 3091 | }, 3092 | { 3093 | "name": "stdout", 3094 | "output_type": "stream", 3095 | "text": [ 3096 | "\n", 3097 | "ending worker 100\n", 3098 | "ending worker 67\n", 3099 | "ending worker 64\n", 3100 | "\n" 3101 | ] 3102 | }, 3103 | { 3104 | "name": "stderr", 3105 | "output_type": "stream", 3106 | "text": [ 3107 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 3108 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 3109 | ] 3110 | }, 3111 | { 3112 | "name": "stdout", 3113 | "output_type": "stream", 3114 | "text": [ 3115 | "\n", 3116 | "ending worker 70\n", 3117 | "ending worker 71\n", 3118 | "\n", 3119 | "\n", 3120 | "ending worker 65\n", 3121 | "ending worker 63\n", 3122 | "\n", 3123 | "ending worker 68\n", 3124 | "\n", 3125 | "ending worker 72\n" 3126 | ] 3127 | }, 3128 | { 3129 | "name": "stderr", 3130 | "output_type": "stream", 3131 | "text": [ 3132 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 3133 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 3134 | ] 3135 | }, 3136 | { 3137 | "name": "stdout", 3138 | "output_type": "stream", 3139 | "text": [ 3140 | "\n", 3141 | "ending worker 80\n", 3142 | "\n", 3143 | "ending worker 81\n" 3144 | ] 3145 | }, 3146 | { 3147 | "name": "stderr", 3148 | "output_type": "stream", 3149 | "text": [ 3150 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 3151 | " if check_sorted and np.any(np.diff(t) < 0.0):\n", 3152 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 3153 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 3154 | ] 3155 | }, 3156 | { 3157 | "name": "stdout", 3158 | "output_type": "stream", 3159 | "text": [ 3160 | "\n", 3161 | "ending worker 90\n" 3162 | ] 3163 | }, 3164 | { 3165 | "name": "stderr", 3166 | "output_type": "stream", 3167 | "text": [ 3168 | "/home/jfpuget/anaconda3/envs/xgb8/lib/python3.6/site-packages/celerite/celerite.py:119: RuntimeWarning: invalid value encountered in less\n", 3169 | " if check_sorted and np.any(np.diff(t) < 0.0):\n" 3170 | ] 3171 | }, 3172 | { 3173 | "name": "stdout", 3174 | "output_type": "stream", 3175 | "text": [ 3176 | "\n", 3177 | "ending worker 82\n", 3178 | "\n", 3179 | "ending worker 88\n", 3180 | "\n", 3181 | "ending worker 89\n", 3182 | "\n", 3183 | "\n", 3184 | "\n", 3185 | "\n", 3186 | "ending worker 84\n", 3187 | "ending worker 85\n", 3188 | "ending worker 86\n", 3189 | "ending worker 83\n", 3190 | "\n", 3191 | "ending worker 87\n" 3192 | ] 3193 | } 3194 | ], 3195 | "source": [ 3196 | "params = [(i, fname) for i in range(91)]\n", 3197 | "params.append((100, fname))\n", 3198 | "\n", 3199 | "if 1: \n", 3200 | " pool = Pool(processes=20, maxtasksperchild=1)\n", 3201 | " ls = pool.map( work_test, params, chunksize=1 )\n", 3202 | " pool.close()\n", 3203 | "else:\n", 3204 | " ls = [work_tta(param) for param in params]" 3205 | ] 3206 | }, 3207 | { 3208 | "cell_type": "code", 3209 | "execution_count": null, 3210 | "metadata": {}, 3211 | "outputs": [], 3212 | "source": [] 3213 | } 3214 | ], 3215 | "metadata": { 3216 | "kernelspec": { 3217 | "display_name": "Python [conda env:xgb8]", 3218 | "language": "python", 3219 | "name": "conda-env-xgb8-py" 3220 | }, 3221 | "language_info": { 3222 | "codemirror_mode": { 3223 | "name": "ipython", 3224 | "version": 3 3225 | }, 3226 | "file_extension": ".py", 3227 | "mimetype": "text/x-python", 3228 | "name": "python", 3229 | "nbconvert_exporter": "python", 3230 | "pygments_lexer": "ipython3", 3231 | "version": "3.6.5" 3232 | } 3233 | }, 3234 | "nbformat": 4, 3235 | "nbformat_minor": 2 3236 | } 3237 | -------------------------------------------------------------------------------- /code/lgb_best.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "fname = 'lgb_1181'\n", 10 | "fname_base = 'base_006'\n", 11 | "fname_bazin = 'bazin_003'\n", 12 | "fname_newling = 'newling_003'" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# number of train time augmentations.\n", 22 | "n_tta = 6\n", 23 | "\n", 24 | "seed = 0" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "from sklearn.model_selection import StratifiedKFold\n", 36 | "from sklearn.metrics import confusion_matrix\n", 37 | "from scipy.optimize import curve_fit\n", 38 | "import gc\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "import seaborn as sns\n", 41 | "import lightgbm as lgb\n", 42 | "import xgboost as xgb\n", 43 | "import logging\n", 44 | "from tqdm import tqdm_notebook\n", 45 | "import itertools\n", 46 | "import pickle as pkl\n", 47 | "\n", 48 | "pd.options.display.max_columns = 400" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "import random as rn\n", 58 | "def init_seeds(seed):\n", 59 | "\n", 60 | " # The below is necessary for starting Numpy generated random numbers\n", 61 | " # in a well-defined initial state.\n", 62 | "\n", 63 | " np.random.seed(seed)\n", 64 | "\n", 65 | " # The below is necessary for starting core Python generated random numbers\n", 66 | " # in a well-defined state.\n", 67 | "\n", 68 | " rn.seed(seed)\n", 69 | "\n", 70 | "\n", 71 | "init_seeds(seed)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def create_logger():\n", 81 | " logger_ = logging.getLogger('main')\n", 82 | " logger_.setLevel(logging.DEBUG)\n", 83 | " fh = logging.FileHandler('simple_lightgbm.log')\n", 84 | " fh.setLevel(logging.DEBUG)\n", 85 | " ch = logging.StreamHandler()\n", 86 | " ch.setLevel(logging.DEBUG)\n", 87 | " formatter = logging.Formatter('[%(levelname)s]%(asctime)s:%(name)s:%(message)s')\n", 88 | " fh.setFormatter(formatter)\n", 89 | " ch.setFormatter(formatter)\n", 90 | " # add the handlers to the logger\n", 91 | " logger_.addHandler(fh)\n", 92 | " logger_.addHandler(ch)\n", 93 | "\n", 94 | "\n", 95 | "def get_logger():\n", 96 | " return logging.getLogger('main')" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "def xgb_multi_weighted_logloss(preds, dtrain):\n", 106 | " labels = dtrain.get_label()\n", 107 | " return 'xgb_multi_weighted_loss', lgb_multi_weighted_logloss(labels, preds)\n", 108 | "\n", 109 | "def eval_lgb_multi_weighted_logloss(preds, train_data, n_tta=n_tta):\n", 110 | " \n", 111 | " label = train_data.get_label()\n", 112 | " classes = list(range(14))\n", 113 | " class_weight = {0: 1, 1: 2, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 2, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1}\n", 114 | " res = lgb_multi_weighted_logloss(label, preds, classes, class_weight, n_tta)\n", 115 | " \n", 116 | " return res\n", 117 | " \n", 118 | "def lgb_multi_weighted_logloss(y_true, y_preds,\n", 119 | " classes=[6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95],\n", 120 | " class_weight={6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, \n", 121 | " 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1},\n", 122 | " n_tta = n_tta,\n", 123 | "):\n", 124 | " \n", 125 | " if len(np.unique(y_true)) > 14:\n", 126 | " classes.append(99)\n", 127 | " class_weight[99] = 2\n", 128 | " y_preds = y_preds.reshape(y_true.shape[0], len(classes), order='F')\n", 129 | " \n", 130 | " size = y_true.shape[0] // n_tta\n", 131 | " y_true = y_true[:size]\n", 132 | " y_p = np.zeros((size, len(classes)))\n", 133 | " \n", 134 | " for i in range(n_tta):\n", 135 | " y_p += y_preds[i * size : (i+1) * size]\n", 136 | " y_p /= n_tta\n", 137 | " # Trasform y_true in dummies\n", 138 | " y_ohe = pd.get_dummies(y_true)\n", 139 | " # Normalize rows and limit y_preds to 1e-15, 1-1e-15\n", 140 | " y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)\n", 141 | " # Transform to log\n", 142 | " y_p_log = np.log(y_p)\n", 143 | " # Get the log for ones, .values is used to drop the index of DataFrames\n", 144 | " # Exclude class 99 for now, since there is no class99 in the training set\n", 145 | " # we gave a special process for that class\n", 146 | " y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)\n", 147 | " # Get the number of positives for each class\n", 148 | " nb_pos = y_ohe.sum(axis=0).values.astype(float)\n", 149 | " # Weight average and divide by the number of positives\n", 150 | " class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])\n", 151 | " y_w = y_log_ones * class_arr / nb_pos\n", 152 | "\n", 153 | " loss = - np.sum(y_w) / np.sum(class_arr)\n", 154 | " return 'wloss', loss, False\n", 155 | "\n", 156 | "\n", 157 | "def multi_weighted_logloss(y_true, y_preds,\n", 158 | " classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95],\n", 159 | " class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, \n", 160 | " 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}\n", 161 | " ):\n", 162 | " \"\"\"\n", 163 | " @author olivier https://www.kaggle.com/ogrellier\n", 164 | " multi logloss for PLAsTiCC challenge\n", 165 | " \"\"\"\n", 166 | " # class_weights taken from Giba's topic : https://www.kaggle.com/titericz\n", 167 | " # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194\n", 168 | " # with Kyle Boone's post https://www.kaggle.com/kyleboone\n", 169 | " if len(np.unique(y_true)) > 14:\n", 170 | " classes.append(99)\n", 171 | " class_weight[99] = 2\n", 172 | " y_p = y_preds\n", 173 | " # Trasform y_true in dummies\n", 174 | " y_ohe = pd.get_dummies(y_true)\n", 175 | " # Normalize rows and limit y_preds to 1e-15, 1-1e-15\n", 176 | " y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)\n", 177 | " # Transform to log\n", 178 | " y_p_log = np.log(y_p)\n", 179 | " # Get the log for ones, .values is used to drop the index of DataFrames\n", 180 | " # Exclude class 99 for now, since there is no class99 in the training set\n", 181 | " # we gave a special process for that class\n", 182 | " y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)\n", 183 | " # Get the number of positives for each class\n", 184 | " nb_pos = y_ohe.sum(axis=0).values.astype(float)\n", 185 | " # Weight average and divide by the number of positives\n", 186 | " class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])\n", 187 | " y_w = y_log_ones * class_arr / nb_pos\n", 188 | "\n", 189 | " loss = - np.sum(y_w) / np.sum(class_arr)\n", 190 | " return loss\n", 191 | "\n", 192 | "def save_importances(importances_):\n", 193 | " mean_gain = importances_[['gain', 'feature']].groupby('feature').mean()\n", 194 | " importances_['mean_gain'] = importances_['feature'].map(mean_gain['gain'])\n", 195 | " plt.figure(figsize=(8, 12))\n", 196 | " sns.barplot(x='gain', y='feature', data=importances_.sort_values('mean_gain', ascending=False))\n", 197 | " plt.tight_layout()\n", 198 | " plt.savefig('importances.png')" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "def get_importances(clfs):\n", 208 | " importances = [clf.feature_importance('gain') for clf in clfs]\n", 209 | " importances = np.vstack(importances)\n", 210 | " mean_gain = np.mean(importances, axis=0)\n", 211 | " features = clfs[0].feature_name()\n", 212 | " data = pd.DataFrame({'gain':mean_gain, 'feature':features})\n", 213 | " plt.figure(figsize=(8, 30))\n", 214 | " sns.barplot(x='gain', y='feature', data=data.sort_values('gain', ascending=False))\n", 215 | " plt.tight_layout()\n", 216 | " plt.savefig('importances.png')\n", 217 | " return data\n", 218 | "\n", 219 | "def train_classifiers(lgb_params, full_train=None, y=None, w=None, verbose=2000, \n", 220 | " folds=5, ttas=None):\n", 221 | " print(full_train.shape[1], 'features')\n", 222 | " kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=1)\n", 223 | " clfs = []\n", 224 | " importances = pd.DataFrame()\n", 225 | " oof_preds = np.zeros((len(full_train), np.unique(y).shape[0]))\n", 226 | " tta_preds = np.zeros((len(full_train), np.unique(y).shape[0]))\n", 227 | " for fold_, (trn_, val_) in tqdm_notebook(enumerate(kf.split(y, y)), total=folds):\n", 228 | " print()\n", 229 | " print('fold %2d' % fold_)\n", 230 | " trn_x, trn_y, trn_w = full_train.iloc[trn_], y.iloc[trn_], w.iloc[trn_]\n", 231 | " val_x, val_y, val_w = full_train.iloc[val_], y.iloc[val_], w.iloc[val_]\n", 232 | " size = val_y.shape[0]\n", 233 | " for tta in ttas:\n", 234 | " tta_x = tta.iloc[trn_]\n", 235 | " trn_x = pd.concat((trn_x, tta_x), axis=0)\n", 236 | " tta_y = y.iloc[trn_]\n", 237 | " trn_y = pd.concat((trn_y, tta_y), axis=0)\n", 238 | " tta_w = w.iloc[trn_]\n", 239 | " trn_w = pd.concat((trn_w, tta_w), axis=0)\n", 240 | " val_x = pd.concat((val_x, tta.iloc[val_]), axis=0)\n", 241 | " val_y = pd.concat((val_y, y.iloc[val_]), axis=0)\n", 242 | " val_w = pd.concat((val_w, w.iloc[val_]), axis=0)\n", 243 | " trn_x = lgb.Dataset(trn_x, label=trn_y, weight=trn_w)\n", 244 | " val_x = lgb.Dataset(val_x, label=val_y, weight=val_w)\n", 245 | " clf = lgb.train(\n", 246 | " lgb_params,\n", 247 | " trn_x, \n", 248 | " num_boost_round = 4000,\n", 249 | " valid_sets=[trn_x, val_x],\n", 250 | " valid_names = ['train', 'val'],\n", 251 | " feval=eval_lgb_multi_weighted_logloss,\n", 252 | " verbose_eval=verbose,\n", 253 | " early_stopping_rounds=100\n", 254 | " )\n", 255 | " val_x = full_train.iloc[val_]\n", 256 | " \n", 257 | " oof_pred = clf.predict(val_x)\n", 258 | " oof_preds[val_, :] = oof_pred\n", 259 | " tta_pred = np.zeros(oof_pred.shape)\n", 260 | " for tta in ttas:\n", 261 | " val_x = tta.iloc[val_]\n", 262 | " tta_pred += clf.predict(val_x)\n", 263 | " tta_pred /= len(ttas)\n", 264 | " tta_preds[val_, :] = tta_pred\n", 265 | " print('val mwloss: %0.3f' % multi_weighted_logloss( y.iloc[val_], oof_pred),\n", 266 | " 'tta mwloss: %0.3f' % multi_weighted_logloss( y.iloc[val_], tta_pred),\n", 267 | " )\n", 268 | " \n", 269 | " clfs.append(clf)\n", 270 | "\n", 271 | " get_logger().info('MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))\n", 272 | "\n", 273 | " importances = get_importances(clfs)\n", 274 | " return clfs, importances, oof_preds, tta_preds" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "def get_data(full_train, train_bazin, newling):\n", 284 | " # get the right object_ids for the result.\n", 285 | " full_train = full_train.merge(train_bazin, how='left', on='object_id')\n", 286 | " full_train = full_train.merge(newling, how='left', on='object_id')\n", 287 | " for pb in range(6):\n", 288 | " full_train['bazin_A_%d' % pb] *= full_train.hostgal_photoz **2 * full_train.scale_mean\n", 289 | " full_train['newling_A_%d' % pb] *= full_train.hostgal_photoz **2 * full_train.scale_mean\n", 290 | " full_train['bazin_magnitude'] = full_train[['bazin_A_%d' % pb for pb in range(6)]].max(axis=1)\n", 291 | " full_train['newling_magnitude'] = full_train[['newling_A_%d' % pb for pb in range(6)]].max(axis=1)\n", 292 | " for pb in range(6):\n", 293 | " full_train['bazin_A_%d' % pb] /= full_train['bazin_magnitude']\n", 294 | " full_train['newling_A_%d' % pb] /= full_train['newling_magnitude']\n", 295 | " full_train.hostgal_photoz = 1*(full_train.hostgal_photoz > 0)\n", 296 | " return full_train" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "with open('../data/ttas_%s.pkl' % fname_base, 'rb') as file:\n", 306 | " ttas = pkl.load(file)\n", 307 | "full_train = ttas[0]\n", 308 | "ttas = ttas[1 : n_tta]\n", 309 | "full_train.head()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "meta_cols = ['object_id', 'hostgal_photoz', 'mwebv', 'target']\n", 319 | "meta_train = pd.read_csv('../data/train_meta.csv')[meta_cols]\n", 320 | "meta_cols = ['object_id', 'hostgal_photoz', 'mwebv']\n", 321 | "meta_test = pd.read_csv('../input/test_set_metadata.csv')[meta_cols]\n", 322 | "\n", 323 | "meta_train.head()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "with open('../data/tta_0_%s.pkl' % fname_bazin, 'rb') as file:\n", 333 | " train_bazin = pkl.load(file)\n", 334 | "train_bazin.head()\n", 335 | " " 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "ttas_bazin = []\n", 345 | "for i in range(1, n_tta):\n", 346 | " with open('../data/tta_%d_%s.pkl' % (i, fname_bazin), 'rb') as file:\n", 347 | " ttas_bazin.append(pkl.load(file))" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "ttas_bazin[0].head()" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "with open('../data/tta_0_%s.pkl' % fname_newling, 'rb') as file:\n", 366 | " train_newling = pkl.load(file)\n", 367 | "train_newling.head()" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "ttas_newling = []\n", 377 | "for i in range(1, n_tta):\n", 378 | " with open('../data/tta_%d_%s.pkl' % (i, fname_newling ), 'rb') as file:\n", 379 | " ttas_newling.append(pkl.load(file))" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "full_train = get_data(full_train, train_bazin, train_newling)\n", 389 | "full_train.head()" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "n_tta = 6\n", 399 | "init_seeds(seed)\n", 400 | "\n", 401 | "ttas = [get_data(full_train, train_bazin, train_newling) \\\n", 402 | " for full_train, train_bazin, train_newling \\\n", 403 | " in tqdm_notebook(zip(ttas, ttas_bazin, ttas_newling))]\n", 404 | "#for tta in ttas:\n", 405 | "# tta.fillna(train_mean, inplace=True)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "classes = sorted(np.unique(meta_train.target))\n", 415 | "classes\n", 416 | "\n", 417 | "class_names = ['class_%d' % c for c in classes]\n", 418 | "\n", 419 | "weights = [1/18 if i not in [15, 64, 99] else 1/9 for i in classes]\n", 420 | "weights\n", 421 | "\n", 422 | "df = meta_train.groupby('target').object_id.count().to_frame('freq')\n", 423 | "df.freq /= df.freq.sum()\n", 424 | "df['weight'] = weights\n", 425 | "df['adjust'] = df.weight / df.freq\n", 426 | "df" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": { 433 | "scrolled": true 434 | }, 435 | "outputs": [], 436 | "source": [ 437 | "y = meta_train['target']\n", 438 | "\n", 439 | "ws = y.copy()\n", 440 | "for c,w in zip(classes, df.adjust.values):\n", 441 | " print(c, w)\n", 442 | " ws[y == c] = w" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "y_lgb = y.copy()\n", 452 | "for i,c in enumerate(classes):\n", 453 | " y_lgb[y_lgb == c] = i" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": { 460 | "scrolled": true 461 | }, 462 | "outputs": [], 463 | "source": [ 464 | "list(full_train.columns)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "removed = [\n", 474 | " 'object_id',\n", 475 | " 'target',\n", 476 | " \n", 477 | " 'newling_A_1',\n", 478 | " 'newling_A_2',\n", 479 | " 'newling_A_3',\n", 480 | " 'newling_A_4',\n", 481 | " 'newling_A_5',\n", 482 | " 'newling_k_1',\n", 483 | " 'newling_k_2',\n", 484 | " 'newling_k_3',\n", 485 | " 'newling_k_4',\n", 486 | " 'newling_k_5',\n", 487 | " \n", 488 | " 'bazin_pcov_0',\n", 489 | " 'bazin_pcov_1',\n", 490 | " 'bazin_pcov_2',\n", 491 | " 'bazin_pcov_3',\n", 492 | " 'bazin_pcov_4',\n", 493 | " 'bazin_pcov_5',\n", 494 | " 'bazin_max_0',\n", 495 | " 'bazin_max_1',\n", 496 | " 'bazin_max_2',\n", 497 | " 'bazin_max_3',\n", 498 | " 'bazin_max_4',\n", 499 | " 'bazin_max_5',\n", 500 | " #'bazin_A_0',\n", 501 | " #'bazin_A_1',\n", 502 | " #'bazin_A_2',\n", 503 | " #'bazin_A_3',\n", 504 | " #'bazin_A_4',\n", 505 | " #'bazin_A_5',\n", 506 | " 'bazin_before_0',\n", 507 | " 'bazin_before_1',\n", 508 | " 'bazin_before_2',\n", 509 | " 'bazin_before_3',\n", 510 | " 'bazin_before_4',\n", 511 | " 'bazin_before_5',\n", 512 | " 'bazin_after_0',\n", 513 | " 'bazin_after_1',\n", 514 | " 'bazin_after_2',\n", 515 | " 'bazin_after_3',\n", 516 | " 'bazin_after_4',\n", 517 | " 'bazin_after_5',\n", 518 | " \n", 519 | " #'bazin_trise',\n", 520 | " \n", 521 | " 'mwebv',\n", 522 | " 'num_obs',\n", 523 | "]\n", 524 | "\n", 525 | "features = [c for c in full_train.columns if c not in removed]\n", 526 | "\n", 527 | "features\n", 528 | "\n", 529 | "full_train1 = full_train[features].copy()\n", 530 | "ttas1 = [tta[features].copy() for tta in ttas]" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [ 539 | "features" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": { 546 | "scrolled": true 547 | }, 548 | "outputs": [], 549 | "source": [ 550 | "lgb_params = {\n", 551 | " 'boosting_type': 'goss',\n", 552 | " 'objective': 'multiclass',\n", 553 | " 'num_class': 14,\n", 554 | " 'metric': 'None',\n", 555 | " 'learning_rate': 0.015,\n", 556 | " 'colsample_bytree': .5,\n", 557 | " 'feature_fraction_seed':seed+2,\n", 558 | " 'reg_alpha': .01,\n", 559 | " 'reg_lambda': .1,\n", 560 | " 'min_split_gain': 0.1,\n", 561 | " 'min_child_weight': 20 * (1 + len(ttas)),\n", 562 | " #'n_estimators': 4000,\n", 563 | " #'silent': -1,\n", 564 | " 'verbose': -1,\n", 565 | " #'max_depth': 4,\n", 566 | " 'num_leaves' : 7,\n", 567 | " #'num_threads': 10,\n", 568 | "}\n", 569 | "clfs, importances, oof_preds, tta_preds = train_classifiers(lgb_params, full_train1, y_lgb, ws, \n", 570 | " folds=10, ttas=ttas1)\n", 571 | "\n", 572 | "#save_importances(importances_=importances)\n", 573 | "print('%0.5f' % multi_weighted_logloss(y, oof_preds), \n", 574 | " '%0.5f' % multi_weighted_logloss(y, tta_preds))" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": null, 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "with open('../data/oof_preds_%s.pkl' % fname, 'wb') as file:\n", 584 | " pkl.dump(oof_preds, file)\n", 585 | " \n", 586 | "with open('../data/tta_preds_%s.pkl' % fname, 'wb') as file:\n", 587 | " pkl.dump(tta_preds, file)" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "def predict_chunk(clfs_, features, class_names, chunk_id, fname_base, \n", 597 | " fname_bazin=fname_bazin, fname_newling=fname_newling, \n", 598 | " ):\n", 599 | "\n", 600 | " with open('../data/full_test_chunk_%s_%d.pkl' % (fname_base, chunk_id), 'rb') as file:\n", 601 | " full_test = pkl.load(file)\n", 602 | " \n", 603 | " with open('../data/bazin_test_%d_%s.pkl' % (chunk_id, fname_bazin), 'rb') as file:\n", 604 | " test_bazin = pkl.load(file)\n", 605 | " \n", 606 | " with open('../data/test_%d_%s.pkl' % (chunk_id, fname_newling), 'rb') as file:\n", 607 | " test_newling = pkl.load(file) \n", 608 | " \n", 609 | " if ('newling_sigma_1') not in test_newling.columns:\n", 610 | " test_newling['newling_sigma_1'] = np.NaN\n", 611 | " \n", 612 | " full_test = get_data(full_test, test_bazin, test_newling)\n", 613 | " #full_test = full_test.fillna(train_mean)\n", 614 | " # Make predictions\n", 615 | " preds_ = None\n", 616 | " for clf in clfs_:\n", 617 | " if preds_ is None:\n", 618 | " preds_ = clf.predict(full_test[features]) / len(clfs_)\n", 619 | " else:\n", 620 | " preds_ += clf.predict(full_test[features]) / len(clfs_)\n", 621 | "\n", 622 | " # Compute preds_99 as the proba of class not being any of the others\n", 623 | " # preds_99 = 0.1 gives 1.769\n", 624 | " preds_99 = np.ones(preds_.shape[0])\n", 625 | " for i in range(preds_.shape[1]):\n", 626 | " preds_99 *= (1 - preds_[:, i])\n", 627 | " \n", 628 | " # Create DataFrame from predictions\n", 629 | " preds_df_ = pd.DataFrame(preds_, columns=class_names)\n", 630 | " preds_df_['object_id'] = full_test['object_id']\n", 631 | " preds_df_['class_99'] = preds_99\n", 632 | "\n", 633 | " print(preds_df_['class_99'].mean())\n", 634 | "\n", 635 | " del full_test, preds_\n", 636 | " gc.collect()\n", 637 | "\n", 638 | " return preds_df_\n" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": { 645 | "scrolled": true 646 | }, 647 | "outputs": [], 648 | "source": [ 649 | "import time\n", 650 | "\n", 651 | "start = time.time()\n", 652 | "chunks = 5000000\n", 653 | "remain_df = None\n", 654 | "\n", 655 | "for i_c in tqdm_notebook(range(91)):\n", 656 | "\n", 657 | " preds_df = predict_chunk(clfs_=clfs,\n", 658 | " features=features,\n", 659 | " class_names=class_names,\n", 660 | " chunk_id=i_c,\n", 661 | " fname_base=fname_base,\n", 662 | " )\n", 663 | "\n", 664 | " if i_c == 0:\n", 665 | " print(preds_df.mean(axis=0))\n", 666 | " preds_df.to_csv('../submissions/%s.csv' %fname, header=True, index=False, float_format='%.6f')\n", 667 | " else:\n", 668 | " preds_df.to_csv('../submissions/%s.csv' %fname, header=False, mode='a', index=False, float_format='%.6f')\n", 669 | "\n", 670 | " del preds_df\n", 671 | " gc.collect()\n", 672 | "\n", 673 | " if (i_c + 1) % 10 == 0:\n", 674 | " get_logger().info('%15d done in %5.1f' % (chunks * (i_c + 1), (time.time() - start) / 60))\n", 675 | " print('%15d done in %5.1f' % (chunks * (i_c + 1), (time.time() - start) / 60))\n", 676 | "\n", 677 | "# Compute last object in remain_df\n", 678 | "\n", 679 | "preds_df = predict_chunk(clfs_=clfs,\n", 680 | " rnn_test=rnn_test,\n", 681 | " features=features,\n", 682 | " class_names=class_names,\n", 683 | " chunk_id=100,\n", 684 | " fname_base=fname_base,\n", 685 | " )\n", 686 | "\n", 687 | "preds_df.to_csv('../submissions/%s.csv' %fname, \n", 688 | " header=False, mode='a', index=False, float_format='%.6f')" 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "execution_count": null, 694 | "metadata": {}, 695 | "outputs": [], 696 | "source": [ 697 | "z = pd.read_csv('../submissions/%s.csv' %fname)\n", 698 | "\n", 699 | "z = z.groupby('object_id').mean()\n", 700 | "\n", 701 | "z.shape" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": null, 707 | "metadata": {}, 708 | "outputs": [], 709 | "source": [ 710 | "meta_cols = ['hostgal_photoz', 'target']\n", 711 | "meta_train2 = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]\n", 712 | "meta_train2.head()\n", 713 | "\n", 714 | "df = meta_train2.groupby('target').hostgal_photoz.mean()\n", 715 | "\n", 716 | "galactic = ['class_%d' % c for c in df[df == 0].index]\n", 717 | "extragal = ['class_%d' % c for c in df[df > 0].index]\n", 718 | "galactic, extragal" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": null, 724 | "metadata": {}, 725 | "outputs": [], 726 | "source": [ 727 | "z = z.reset_index()\n", 728 | "\n", 729 | "z['class_99'] *= (0.18 / z['class_99'].mean())\n", 730 | "\n", 731 | "z.loc[meta_test.hostgal_photoz == 0, extragal] = 0\n", 732 | "\n", 733 | "z.loc[meta_test.hostgal_photoz > 0, galactic] = 0\n", 734 | "\n", 735 | "z.mean(axis=0)" 736 | ] 737 | }, 738 | { 739 | "cell_type": "code", 740 | "execution_count": null, 741 | "metadata": {}, 742 | "outputs": [], 743 | "source": [ 744 | "#z.to_csv('../submissions/gal_%s.csv' %fname, index=False, float_format='%.6f')\n", 745 | "\n", 746 | "z['class_99'] = (1. - z[z.columns[1:-1]]).prod(axis=1)\n", 747 | "\n", 748 | "z.mean(axis=0)" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": null, 754 | "metadata": {}, 755 | "outputs": [], 756 | "source": [ 757 | "z['class_99'] *= (0.18 / z['class_99'].mean())\n", 758 | "\n", 759 | "z.to_csv('../submissions/gal_2_%s.csv' %fname, index=False, float_format='%.6f')" 760 | ] 761 | } 762 | ], 763 | "metadata": { 764 | "kernelspec": { 765 | "display_name": "Python [conda env:xgb8]", 766 | "language": "python", 767 | "name": "conda-env-xgb8-py" 768 | }, 769 | "language_info": { 770 | "codemirror_mode": { 771 | "name": "ipython", 772 | "version": 3 773 | }, 774 | "file_extension": ".py", 775 | "mimetype": "text/x-python", 776 | "name": "python", 777 | "nbconvert_exporter": "python", 778 | "pygments_lexer": "ipython3", 779 | "version": "3.6.5" 780 | } 781 | }, 782 | "nbformat": 4, 783 | "nbformat_minor": 2 784 | } 785 | -------------------------------------------------------------------------------- /code/test_chunks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "fname='test'\n", 10 | "seed=0" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 9, 16 | "metadata": { 17 | "scrolled": true 18 | }, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "application/vnd.jupyter.widget-view+json": { 23 | "model_id": "371992443bc34c6aaf2a29ea0ee5ba46", 24 | "version_major": 2, 25 | "version_minor": 0 26 | }, 27 | "text/plain": [ 28 | "HBox(children=(IntProgress(value=0, max=91), HTML(value='')))" 29 | ] 30 | }, 31 | "metadata": {}, 32 | "output_type": "display_data" 33 | }, 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "import numpy as np\n", 44 | "import pandas as pd\n", 45 | "from sklearn.model_selection import StratifiedKFold\n", 46 | "from sklearn.metrics import confusion_matrix\n", 47 | "import gc\n", 48 | "import matplotlib.pyplot as plt\n", 49 | "import seaborn as sns\n", 50 | "import lightgbm as lgb\n", 51 | "import xgboost as xgb\n", 52 | "import logging\n", 53 | "from tqdm import tqdm_notebook\n", 54 | "import itertools\n", 55 | "\n", 56 | "import pickle as pkl\n", 57 | "\n", 58 | "import time\n", 59 | "\n", 60 | "start = time.time()\n", 61 | "chunks = 5000000\n", 62 | "remain_df = None\n", 63 | "\n", 64 | "for i_c, df in tqdm_notebook(enumerate(pd.read_csv('../input/test_set.csv', \n", 65 | " chunksize=chunks, \n", 66 | " iterator=True)),\n", 67 | " total=91):\n", 68 | " # Check object_ids\n", 69 | " # I believe np.unique keeps the order of group_ids as they appear in the file\n", 70 | " unique_ids = np.unique(df['object_id'])\n", 71 | " new_remain_df = df.loc[df['object_id'] == unique_ids[-1]].copy()\n", 72 | "\n", 73 | " if remain_df is None:\n", 74 | " df = df.loc[df['object_id'].isin(unique_ids[:-1])].copy()\n", 75 | " else:\n", 76 | " df = pd.concat([remain_df, df.loc[df['object_id'].isin(unique_ids[:-1])]], axis=0)\n", 77 | "\n", 78 | " # Create remaining samples df\n", 79 | " remain_df = new_remain_df\n", 80 | "\n", 81 | " with open('../input/test_chunk_%d.csv' %i_c, 'wb') as file:\n", 82 | " pkl.dump(df, file)\n", 83 | " \n", 84 | "with open('../input/test_chunk_%d.csv' %100, 'wb') as file:\n", 85 | " pkl.dump(remain_df, file)\n" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python [conda env:xgb8]", 92 | "language": "python", 93 | "name": "conda-env-xgb8-py" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 3 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython3", 105 | "version": "3.6.5" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 2 110 | } 111 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | Directory for storing temporary data, mostly precomputed features. 2 | -------------------------------------------------------------------------------- /input/README.md: -------------------------------------------------------------------------------- 1 | Competition data should be put here 2 | -------------------------------------------------------------------------------- /submissions/README.md: -------------------------------------------------------------------------------- 1 | Directory for submisison files. 2 | --------------------------------------------------------------------------------