├── README.md └── Umojahack-Challenge-3-Top1-Notebook.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # UmojaHack Africa 2021 #3: Financial Resilience Challenge 2 | It was an Unbelievable WeekEnd For me as I won Challenges#3 in UmojaHack Africa 2021 with my amazing Teammate @ASSAZZIN. 3 | this repository contains Code Solutions on how to get Top1 Place in Financial Resilience Challenge . Enjoy ! 4 | 5 | # Brief Description 6 | The objective of this challenge is to to build a machine learning model to predict which individuals across Africa and around the world are most likely to be financially resilient or not., have a look on Zindi. 7 | 8 | # About this Notebook 9 | This is our final notebook it will give you 1rd place ! 10 | 11 | 1. Make Sure that you modify execution type to 'GPU' - 12 | 2. Make Sure that you're working on Tesla P100 Machine 13 | 14 | # Leaderboard 15 | Look for the team name : adatnomerTN 16 | Rank : 1/463 17 | 18 | # Authors 19 |
20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |
NameZindi IDGithub ID
Saifeddine AZZABI@azzabinho001@saif2020
Azer KSOURI@ASSAZZIN@Az-Ks
39 |
40 | -------------------------------------------------------------------------------- /Umojahack-Challenge-3-Top1-Notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{"trusted":true},"cell_type":"code","source":"!nvidia-smi","execution_count":38,"outputs":[{"output_type":"stream","text":"Mon Mar 29 11:41:53 2021 \r\n+-----------------------------------------------------------------------------+\r\n| NVIDIA-SMI 450.51.06 Driver Version: 450.51.06 CUDA Version: 11.0 |\r\n|-------------------------------+----------------------+----------------------+\r\n| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n| | | MIG M. |\r\n|===============================+======================+======================|\r\n| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\r\n| N/A 37C P0 35W / 250W | 367MiB / 16280MiB | 0% Default |\r\n| | | N/A |\r\n+-------------------------------+----------------------+----------------------+\r\n \r\n+-----------------------------------------------------------------------------+\r\n| Processes: |\r\n| GPU GI CI PID Type Process name GPU Memory |\r\n| ID ID Usage |\r\n|=============================================================================|\r\n+-----------------------------------------------------------------------------+\r\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"# 1-Import libraries and Load files"},{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"id":"8w2Q1hri2QI8"},"cell_type":"code","source":"# Import libraries\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\npd.set_option('max_colwidth', 500)\n\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n%matplotlib inline\n\nimport os, sys, gc, warnings, random\nwarnings.filterwarnings('ignore')\n\nfrom sklearn.model_selection import StratifiedKFold\nfrom sklearn import preprocessing\nfrom sklearn.metrics import auc, classification_report, roc_auc_score\nfrom sklearn.model_selection import StratifiedKFold\n\nimport lightgbm as lgb\nimport xgboost as xgb\nfrom catboost import CatBoostClassifier ,Pool","execution_count":20,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"!pip install catboost==0.22 --quiet","execution_count":21,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# File PATH\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))","execution_count":22,"outputs":[{"output_type":"stream","text":"/kaggle/input/umoja2021/SampleSubmission.csv\n/kaggle/input/umoja2021/Train.csv\n/kaggle/input/umoja2021/VariableDefinitions.csv\n/kaggle/input/umoja2021/Test.csv\n","name":"stdout"}]},{"metadata":{"trusted":true,"id":"3HJjr1OI2QJE"},"cell_type":"code","source":"# Load files\ntrain = pd.read_csv('/kaggle/input/umoja2021/Train.csv')\ntest = pd.read_csv('/kaggle/input/umoja2021/Test.csv')\nsamplesubmission = pd.read_csv('/kaggle/input/umoja2021/SampleSubmission.csv')\nvariable_definations = pd.read_csv('/kaggle/input/umoja2021/VariableDefinitions.csv')","execution_count":23,"outputs":[]},{"metadata":{"id":"WqrOvQFO2QJE"},"cell_type":"markdown","source":"# 2-Preprocessing\n "},{"metadata":{"trusted":true},"cell_type":"code","source":"def seed_everything(seed):\n random.seed(seed)\n np.random.seed(seed)\nSEED = 42 \nseed_everything(SEED) ","execution_count":24,"outputs":[]},{"metadata":{"trusted":true,"id":"3okRJIeX2QJF"},"cell_type":"code","source":"# label_encoder object knows how to understand word labels.\nlabel_encoder = preprocessing.LabelEncoder()\n \n# Encode labels in column 'country'.\ntrain['country_code']= label_encoder.fit_transform(train['country'])\n\ntest['country_code']= label_encoder.transform(test['country'])","execution_count":25,"outputs":[]},{"metadata":{"trusted":true,"id":"jswtMSoQ2QJF"},"cell_type":"code","source":"# label_encoder object knows how to understand word labels.\nlabel_encoder2 = preprocessing.LabelEncoder()\n \n# Encode labels in column 'region'.\ntrain['region_code']= label_encoder2.fit_transform(train['region'].astype(str))\n\ntest['region_code']= label_encoder2.transform(test['region'].astype(str))","execution_count":26,"outputs":[]},{"metadata":{"id":"z7EF7VF52QJF"},"cell_type":"markdown","source":"# 3-Modeling and Validation\n"},{"metadata":{"id":"gRYMRA4SD6Fv","trusted":true},"cell_type":"code","source":"remove_features = ['ID', 'country', 'region','target']\nfeatures_columns = [col for col in train.columns if col not in remove_features]\ncateg_features = ['country_code','region_code']\nX,y = train[features_columns], train['target']","execution_count":27,"outputs":[]},{"metadata":{"id":"F-6T3oAW3ldT"},"cell_type":"markdown","source":"# 3.1-Catboost"},{"metadata":{"id":"lM8paB7AAyRw","trusted":true},"cell_type":"code","source":"score = 0\ntest['target'] = 0\nskf = StratifiedKFold(n_splits=5,shuffle=True, random_state=SEED)\noof_cat = np.zeros((train.shape[0],))\ncat_preds= []\nfor fold_, (trn_idx, val_idx) in enumerate(skf.split(X, train.country)):\n print('Fold:',fold_+1)\n \n # Creating lgb train/valid data\n X_train, y_train = X.iloc[trn_idx,:], y[trn_idx] \n X_test, y_test = X.iloc[val_idx,:], y[val_idx] \n \n estimator = CatBoostClassifier(learning_rate=0.05,\n task_type=\"GPU\",\n devices='0:1',\n iterations=10000,eval_metric='AUC',\n use_best_model =True,\n verbose=100,\n random_seed= 0)\n estimator.fit(Pool(X_train,y_train,cat_features = categ_features),\n eval_set = Pool(X_test,y_test,cat_features = categ_features),early_stopping_rounds=200)\n \n y_pred_val = estimator.predict_proba(X_test)[:,1]\n oof_cat[val_idx] = y_pred_val\n score = score + roc_auc_score(y_test, y_pred_val)\n y_pred_test = estimator.predict_proba(test[features_columns])[:,1]\n cat_preds.append(y_pred_test)\nprint('OOF score :',roc_auc_score(y, oof_cat)) ","execution_count":28,"outputs":[{"output_type":"stream","text":"Fold: 1\n0:\tlearn: 0.7462481\ttest: 0.7486294\tbest: 0.7486294 (0)\ttotal: 89ms\tremaining: 14m 50s\n100:\tlearn: 0.7948139\ttest: 0.7951516\tbest: 0.7951516 (100)\ttotal: 3.17s\tremaining: 5m 10s\n200:\tlearn: 0.8018712\ttest: 0.7992482\tbest: 0.7992482 (200)\ttotal: 5.82s\tremaining: 4m 43s\n300:\tlearn: 0.8065880\ttest: 0.8011628\tbest: 0.8011628 (300)\ttotal: 8.69s\tremaining: 4m 39s\n400:\tlearn: 0.8104563\ttest: 0.8023691\tbest: 0.8023691 (400)\ttotal: 11.4s\tremaining: 4m 32s\n500:\tlearn: 0.8136608\ttest: 0.8030019\tbest: 0.8030076 (492)\ttotal: 14s\tremaining: 4m 26s\n600:\tlearn: 0.8164281\ttest: 0.8034564\tbest: 0.8034607 (598)\ttotal: 16.7s\tremaining: 4m 21s\n700:\tlearn: 0.8189620\ttest: 0.8037325\tbest: 0.8037422 (694)\ttotal: 19.6s\tremaining: 4m 20s\n800:\tlearn: 0.8213338\ttest: 0.8040116\tbest: 0.8040350 (794)\ttotal: 22.3s\tremaining: 4m 16s\n900:\tlearn: 0.8237124\ttest: 0.8042645\tbest: 0.8042771 (890)\ttotal: 25s\tremaining: 4m 12s\n1000:\tlearn: 0.8257954\ttest: 0.8044313\tbest: 0.8044551 (996)\ttotal: 27.7s\tremaining: 4m 8s\n1100:\tlearn: 0.8278434\ttest: 0.8045970\tbest: 0.8045988 (1097)\ttotal: 30.7s\tremaining: 4m 7s\n1200:\tlearn: 0.8299275\ttest: 0.8047938\tbest: 0.8047938 (1200)\ttotal: 34.1s\tremaining: 4m 10s\n1300:\tlearn: 0.8319957\ttest: 0.8048546\tbest: 0.8048641 (1296)\ttotal: 36.8s\tremaining: 4m 6s\n1400:\tlearn: 0.8340443\ttest: 0.8049457\tbest: 0.8049520 (1397)\ttotal: 39.9s\tremaining: 4m 5s\n1500:\tlearn: 0.8358794\ttest: 0.8050172\tbest: 0.8050423 (1467)\ttotal: 42.7s\tremaining: 4m 1s\n1600:\tlearn: 0.8377327\ttest: 0.8049818\tbest: 0.8050423 (1467)\ttotal: 45.4s\tremaining: 3m 58s\n1700:\tlearn: 0.8393964\ttest: 0.8050640\tbest: 0.8050723 (1679)\ttotal: 48.1s\tremaining: 3m 54s\n1800:\tlearn: 0.8410930\ttest: 0.8051071\tbest: 0.8051154 (1796)\ttotal: 50.9s\tremaining: 3m 51s\n1900:\tlearn: 0.8427789\ttest: 0.8051029\tbest: 0.8051493 (1812)\ttotal: 53.7s\tremaining: 3m 48s\n2000:\tlearn: 0.8443660\ttest: 0.8050410\tbest: 0.8051493 (1812)\ttotal: 56.4s\tremaining: 3m 45s\nbestTest = 0.8051493466\nbestIteration = 1812\nShrink model to first 1813 iterations.\nFold: 2\n0:\tlearn: 0.7498209\ttest: 0.7480360\tbest: 0.7480360 (0)\ttotal: 27.1ms\tremaining: 4m 31s\n100:\tlearn: 0.7956678\ttest: 0.7907229\tbest: 0.7907229 (100)\ttotal: 3.32s\tremaining: 5m 25s\n200:\tlearn: 0.8024419\ttest: 0.7952886\tbest: 0.7952886 (200)\ttotal: 6.38s\tremaining: 5m 11s\n300:\tlearn: 0.8070555\ttest: 0.7976007\tbest: 0.7976007 (300)\ttotal: 9.05s\tremaining: 4m 51s\n400:\tlearn: 0.8107060\ttest: 0.7988920\tbest: 0.7988920 (400)\ttotal: 11.9s\tremaining: 4m 44s\n500:\tlearn: 0.8137488\ttest: 0.7996035\tbest: 0.7996172 (497)\ttotal: 14.6s\tremaining: 4m 37s\n600:\tlearn: 0.8163797\ttest: 0.8001350\tbest: 0.8001412 (592)\ttotal: 17.3s\tremaining: 4m 30s\n700:\tlearn: 0.8189297\ttest: 0.8005466\tbest: 0.8005478 (697)\ttotal: 20s\tremaining: 4m 25s\n800:\tlearn: 0.8212979\ttest: 0.8009501\tbest: 0.8009501 (800)\ttotal: 22.9s\tremaining: 4m 22s\n900:\tlearn: 0.8234956\ttest: 0.8011888\tbest: 0.8011950 (889)\ttotal: 25.6s\tremaining: 4m 18s\n1000:\tlearn: 0.8257377\ttest: 0.8014680\tbest: 0.8014780 (998)\ttotal: 28.3s\tremaining: 4m 14s\n1100:\tlearn: 0.8277685\ttest: 0.8016165\tbest: 0.8016255 (1080)\ttotal: 30.9s\tremaining: 4m 9s\n1200:\tlearn: 0.8296681\ttest: 0.8016744\tbest: 0.8016889 (1196)\ttotal: 33.8s\tremaining: 4m 7s\n1300:\tlearn: 0.8315968\ttest: 0.8017375\tbest: 0.8017586 (1240)\ttotal: 37.3s\tremaining: 4m 9s\n1400:\tlearn: 0.8334359\ttest: 0.8017283\tbest: 0.8018239 (1326)\ttotal: 40s\tremaining: 4m 5s\n1500:\tlearn: 0.8351332\ttest: 0.8017324\tbest: 0.8018239 (1326)\ttotal: 42.6s\tremaining: 4m 1s\nbestTest = 0.8018239141\nbestIteration = 1326\nShrink model to first 1327 iterations.\nFold: 3\n0:\tlearn: 0.7482909\ttest: 0.7506087\tbest: 0.7506087 (0)\ttotal: 29.3ms\tremaining: 4m 52s\n100:\tlearn: 0.7959581\ttest: 0.7926021\tbest: 0.7926021 (100)\ttotal: 2.77s\tremaining: 4m 31s\n200:\tlearn: 0.8026282\ttest: 0.7963722\tbest: 0.7963778 (199)\ttotal: 5.45s\tremaining: 4m 25s\n300:\tlearn: 0.8073139\ttest: 0.7980776\tbest: 0.7980776 (300)\ttotal: 8.13s\tremaining: 4m 21s\n400:\tlearn: 0.8109858\ttest: 0.7990093\tbest: 0.7990100 (398)\ttotal: 11s\tremaining: 4m 23s\n500:\tlearn: 0.8141176\ttest: 0.7996457\tbest: 0.7996457 (500)\ttotal: 13.7s\tremaining: 4m 19s\n600:\tlearn: 0.8169320\ttest: 0.7999684\tbest: 0.7999781 (583)\ttotal: 16.4s\tremaining: 4m 16s\n700:\tlearn: 0.8193592\ttest: 0.7999837\tbest: 0.8000273 (691)\ttotal: 19.2s\tremaining: 4m 14s\n800:\tlearn: 0.8218538\ttest: 0.8001575\tbest: 0.8001578 (790)\ttotal: 22.7s\tremaining: 4m 20s\n900:\tlearn: 0.8242201\ttest: 0.8002918\tbest: 0.8002999 (888)\ttotal: 25.4s\tremaining: 4m 16s\n1000:\tlearn: 0.8264083\ttest: 0.8002907\tbest: 0.8003142 (968)\ttotal: 28.1s\tremaining: 4m 12s\n1100:\tlearn: 0.8285202\ttest: 0.8002641\tbest: 0.8003247 (1027)\ttotal: 31.1s\tremaining: 4m 10s\n1200:\tlearn: 0.8304914\ttest: 0.8001784\tbest: 0.8003247 (1027)\ttotal: 33.7s\tremaining: 4m 6s\nbestTest = 0.8003246784\nbestIteration = 1027\nShrink model to first 1028 iterations.\nFold: 4\n0:\tlearn: 0.7453113\ttest: 0.7417352\tbest: 0.7417352 (0)\ttotal: 30.6ms\tremaining: 5m 5s\n100:\tlearn: 0.7944876\ttest: 0.7939741\tbest: 0.7939741 (100)\ttotal: 2.86s\tremaining: 4m 40s\n200:\tlearn: 0.8015060\ttest: 0.7984001\tbest: 0.7984001 (200)\ttotal: 6.03s\tremaining: 4m 53s\n300:\tlearn: 0.8062183\ttest: 0.8004809\tbest: 0.8004863 (299)\ttotal: 8.71s\tremaining: 4m 40s\n400:\tlearn: 0.8098219\ttest: 0.8017541\tbest: 0.8017596 (399)\ttotal: 11.4s\tremaining: 4m 32s\n500:\tlearn: 0.8129882\ttest: 0.8026219\tbest: 0.8026479 (497)\ttotal: 14.1s\tremaining: 4m 26s\n600:\tlearn: 0.8157722\ttest: 0.8031160\tbest: 0.8031215 (597)\ttotal: 17.9s\tremaining: 4m 40s\n700:\tlearn: 0.8183806\ttest: 0.8037194\tbest: 0.8037347 (696)\ttotal: 20.7s\tremaining: 4m 34s\n800:\tlearn: 0.8207813\ttest: 0.8042623\tbest: 0.8042623 (800)\ttotal: 23.4s\tremaining: 4m 29s\n900:\tlearn: 0.8230681\ttest: 0.8045021\tbest: 0.8045021 (900)\ttotal: 26.5s\tremaining: 4m 27s\n1000:\tlearn: 0.8252130\ttest: 0.8048085\tbest: 0.8048119 (998)\ttotal: 29.2s\tremaining: 4m 22s\n1100:\tlearn: 0.8272741\ttest: 0.8050154\tbest: 0.8050154 (1100)\ttotal: 31.9s\tremaining: 4m 18s\n1200:\tlearn: 0.8292813\ttest: 0.8051452\tbest: 0.8051452 (1200)\ttotal: 34.6s\tremaining: 4m 13s\n1300:\tlearn: 0.8313912\ttest: 0.8052602\tbest: 0.8052621 (1291)\ttotal: 37.6s\tremaining: 4m 11s\n1400:\tlearn: 0.8331903\ttest: 0.8054034\tbest: 0.8054113 (1398)\ttotal: 40.3s\tremaining: 4m 7s\n1500:\tlearn: 0.8350297\ttest: 0.8056519\tbest: 0.8056519 (1500)\ttotal: 43s\tremaining: 4m 3s\n1600:\tlearn: 0.8368011\ttest: 0.8057263\tbest: 0.8057814 (1575)\ttotal: 45.8s\tremaining: 4m\n1700:\tlearn: 0.8385022\ttest: 0.8058443\tbest: 0.8058556 (1699)\ttotal: 49.5s\tremaining: 4m 1s\n1800:\tlearn: 0.8401912\ttest: 0.8059524\tbest: 0.8059524 (1800)\ttotal: 52.2s\tremaining: 3m 57s\n1900:\tlearn: 0.8419314\ttest: 0.8058357\tbest: 0.8059524 (1800)\ttotal: 54.9s\tremaining: 3m 53s\n2000:\tlearn: 0.8435998\ttest: 0.8059264\tbest: 0.8059524 (1800)\ttotal: 57.6s\tremaining: 3m 50s\nbestTest = 0.8059524298\nbestIteration = 1800\nShrink model to first 1801 iterations.\nFold: 5\n0:\tlearn: 0.7476033\ttest: 0.7517107\tbest: 0.7517107 (0)\ttotal: 29.3ms\tremaining: 4m 52s\n100:\tlearn: 0.7946339\ttest: 0.7957892\tbest: 0.7957892 (100)\ttotal: 2.75s\tremaining: 4m 29s\n200:\tlearn: 0.8018562\ttest: 0.7999948\tbest: 0.7999948 (200)\ttotal: 5.43s\tremaining: 4m 24s\n300:\tlearn: 0.8065792\ttest: 0.8014743\tbest: 0.8014760 (299)\ttotal: 8.31s\tremaining: 4m 27s\n400:\tlearn: 0.8103896\ttest: 0.8023871\tbest: 0.8023871 (400)\ttotal: 11.1s\tremaining: 4m 26s\n500:\tlearn: 0.8135478\ttest: 0.8030231\tbest: 0.8030231 (500)\ttotal: 13.8s\tremaining: 4m 22s\n600:\tlearn: 0.8162794\ttest: 0.8034980\tbest: 0.8034980 (600)\ttotal: 16.6s\tremaining: 4m 19s\n700:\tlearn: 0.8187928\ttest: 0.8038943\tbest: 0.8039035 (694)\ttotal: 20.4s\tremaining: 4m 30s\n800:\tlearn: 0.8211960\ttest: 0.8042510\tbest: 0.8042510 (800)\ttotal: 23.1s\tremaining: 4m 25s\n900:\tlearn: 0.8234879\ttest: 0.8045413\tbest: 0.8045415 (897)\ttotal: 25.8s\tremaining: 4m 20s\n1000:\tlearn: 0.8257281\ttest: 0.8046781\tbest: 0.8046896 (986)\ttotal: 28.5s\tremaining: 4m 16s\n1100:\tlearn: 0.8278850\ttest: 0.8048415\tbest: 0.8048508 (1098)\ttotal: 31.4s\tremaining: 4m 14s\n1200:\tlearn: 0.8298442\ttest: 0.8049973\tbest: 0.8050114 (1192)\ttotal: 34.1s\tremaining: 4m 9s\n1300:\tlearn: 0.8319359\ttest: 0.8050599\tbest: 0.8050684 (1285)\ttotal: 36.8s\tremaining: 4m 6s\n1400:\tlearn: 0.8338048\ttest: 0.8050826\tbest: 0.8050940 (1339)\ttotal: 39.5s\tremaining: 4m 2s\n","name":"stdout"},{"output_type":"stream","text":"1500:\tlearn: 0.8356773\ttest: 0.8051198\tbest: 0.8051291 (1498)\ttotal: 42.4s\tremaining: 4m\n1600:\tlearn: 0.8373988\ttest: 0.8051282\tbest: 0.8051570 (1535)\ttotal: 45.1s\tremaining: 3m 56s\n1700:\tlearn: 0.8392369\ttest: 0.8049975\tbest: 0.8051570 (1535)\ttotal: 47.8s\tremaining: 3m 53s\nbestTest = 0.8051570356\nbestIteration = 1535\nShrink model to first 1536 iterations.\nOOF score : 0.8036701848165237\n","name":"stdout"}]},{"metadata":{"id":"K4edUJPsDZ5I","trusted":true},"cell_type":"code","source":"catboost_preds = np.mean(cat_preds,axis=0)","execution_count":29,"outputs":[]},{"metadata":{"id":"IeAzXdIy3juU"},"cell_type":"markdown","source":"# 3.2-LGBM"},{"metadata":{"trusted":true,"id":"huQajYgL2QJG"},"cell_type":"code","source":"########################### Models params and Features\n###########################################################\n\nlgb_params = {'boosting_type': 'gbdt','objective': 'binary','metric': 'auc',\n 'n_estimators': 500,'early_stopping_rounds': 100,'sub_sample' : 0.7,\n 'colsample_bytree' : 0.6,'seed': SEED,'silent':False\n }","execution_count":30,"outputs":[]},{"metadata":{"trusted":true,"id":"U_01IcGp2QJG","outputId":"d7e230dc-0e72-4125-ca09-75b36c2fe043"},"cell_type":"code","source":"score = 0\ntest['target'] = 0\nskf = StratifiedKFold(n_splits=5,shuffle=True, random_state=SEED)\noof_lgb = np.zeros((train.shape[0],))\nlgb_preds = []\nfor fold_, (trn_idx, val_idx) in enumerate(skf.split(X, train.country)):\n print('Fold:',fold_+1)\n \n # Creating lgb train/valid data\n tr_x, tr_y = X.iloc[trn_idx,:], y[trn_idx] \n vl_x, vl_y = X.iloc[val_idx,:], y[val_idx] \n \n train_data = lgb.Dataset(tr_x, label=tr_y,categorical_feature=categ_features)\n valid_data = lgb.Dataset(vl_x, label=vl_y,categorical_feature=categ_features)\n # Train Model\n \n estimator = lgb.train(\n lgb_params,\n train_data,\n valid_sets = [train_data,valid_data],\n verbose_eval = 100,\n )\n\n y_pred_val = estimator.predict(vl_x,num_iteration=estimator.best_iteration)\n oof_lgb[val_idx] = y_pred_val\n score = score + roc_auc_score(vl_y, y_pred_val)\n y_pred_test = estimator.predict(test[features_columns],num_iteration=estimator.best_iteration)\n lgb_preds.append(y_pred_test)\n\nprint('OOF score :',roc_auc_score(y, oof_lgb))","execution_count":31,"outputs":[{"output_type":"stream","text":"Fold: 1\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] Number of positive: 48828, number of negative: 35065\n[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014620 seconds.\nYou can set `force_row_wise=true` to remove the overhead.\nAnd if memory is not enough, you can set `force_col_wise=true`.\n[LightGBM] [Info] Total Bins 549\n[LightGBM] [Info] Number of data points in the train set: 83893, number of used features: 34\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.582027 -> initscore=0.331100\n[LightGBM] [Info] Start training from score 0.331100\nTraining until validation scores don't improve for 100 rounds\n[100]\ttraining's auc: 0.825801\tvalid_1's auc: 0.803917\n[200]\ttraining's auc: 0.839919\tvalid_1's auc: 0.804129\nEarly stopping, best iteration is:\n[170]\ttraining's auc: 0.836514\tvalid_1's auc: 0.804521\nFold: 2\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] Number of positive: 48838, number of negative: 35055\n[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014680 seconds.\nYou can set `force_row_wise=true` to remove the overhead.\nAnd if memory is not enough, you can set `force_col_wise=true`.\n[LightGBM] [Info] Total Bins 549\n[LightGBM] [Info] Number of data points in the train set: 83893, number of used features: 34\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.582146 -> initscore=0.331590\n[LightGBM] [Info] Start training from score 0.331590\nTraining until validation scores don't improve for 100 rounds\n[100]\ttraining's auc: 0.826566\tvalid_1's auc: 0.801671\n[200]\ttraining's auc: 0.840782\tvalid_1's auc: 0.801995\nEarly stopping, best iteration is:\n[154]\ttraining's auc: 0.834982\tvalid_1's auc: 0.802106\nFold: 3\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] Number of positive: 48717, number of negative: 35177\n[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014733 seconds.\nYou can set `force_row_wise=true` to remove the overhead.\nAnd if memory is not enough, you can set `force_col_wise=true`.\n[LightGBM] [Info] Total Bins 548\n[LightGBM] [Info] Number of data points in the train set: 83894, number of used features: 34\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.580697 -> initscore=0.325636\n[LightGBM] [Info] Start training from score 0.325636\nTraining until validation scores don't improve for 100 rounds\n[100]\ttraining's auc: 0.827203\tvalid_1's auc: 0.799076\nEarly stopping, best iteration is:\n[94]\ttraining's auc: 0.826181\tvalid_1's auc: 0.799184\nFold: 4\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] Number of positive: 48894, number of negative: 35000\n[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014916 seconds.\nYou can set `force_row_wise=true` to remove the overhead.\nAnd if memory is not enough, you can set `force_col_wise=true`.\n[LightGBM] [Info] Total Bins 549\n[LightGBM] [Info] Number of data points in the train set: 83894, number of used features: 34\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.582807 -> initscore=0.334307\n[LightGBM] [Info] Start training from score 0.334307\nTraining until validation scores don't improve for 100 rounds\n[100]\ttraining's auc: 0.826256\tvalid_1's auc: 0.804794\n[200]\ttraining's auc: 0.83951\tvalid_1's auc: 0.805614\nEarly stopping, best iteration is:\n[165]\ttraining's auc: 0.835675\tvalid_1's auc: 0.805788\nFold: 5\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] Number of positive: 48767, number of negative: 35127\n[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014844 seconds.\nYou can set `force_row_wise=true` to remove the overhead.\nAnd if memory is not enough, you can set `force_col_wise=true`.\n[LightGBM] [Info] Total Bins 548\n[LightGBM] [Info] Number of data points in the train set: 83894, number of used features: 34\n[LightGBM] [Warning] Unknown parameter: silent\n[LightGBM] [Warning] Unknown parameter: sub_sample\n[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.581293 -> initscore=0.328084\n[LightGBM] [Info] Start training from score 0.328084\nTraining until validation scores don't improve for 100 rounds\n[100]\ttraining's auc: 0.82609\tvalid_1's auc: 0.804666\n[200]\ttraining's auc: 0.840797\tvalid_1's auc: 0.805351\n[300]\ttraining's auc: 0.850344\tvalid_1's auc: 0.804868\nEarly stopping, best iteration is:\n[203]\ttraining's auc: 0.841149\tvalid_1's auc: 0.805382\nOOF score : 0.8033790365333908\n","name":"stdout"}]},{"metadata":{"id":"dFkfWwaAH-c-","trusted":true},"cell_type":"code","source":"lightgbm_preds = np.mean(lgb_preds,axis=0)","execution_count":32,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# 3.3-Xgboost"},{"metadata":{"trusted":true},"cell_type":"code","source":"########################### Models params and Features\n###########################################################\n\nparam = {'objective': 'binary:logistic','eval_metric': 'auc','subsample': 0.8,'colsample_bytree' : 0.9,'learning_rate':0.02,'random_state':SEED, 'seed':SEED,'gpu_id': 0, 'booster': 'gbtree','tree_method': 'gpu_hist', 'grow_policy':'lossguide'}","execution_count":33,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"score = 0\ntest['target'] = 0\nskf = StratifiedKFold(n_splits=5,shuffle=True, random_state=SEED)\noof_xgb = np.zeros((train.shape[0],))\nxgb_preds = []\nfor fold_, (trn_idx, val_idx) in enumerate(skf.split(X, train.country)):\n print('Fold:',fold_+1)\n \n # Creating lgb train/valid data\n tr_x, tr_y = X.iloc[trn_idx,:], y[trn_idx] \n vl_x, vl_y = X.iloc[val_idx,:], y[val_idx] \n \n train_data = xgb.DMatrix(tr_x, label=tr_y)\n valid_data = xgb.DMatrix(vl_x, label=vl_y)\n ###\n evals=[(train_data, 'train'), (valid_data, 'valid')]\n estimator_clf = xgb.train(param,train_data,evals=evals,num_boost_round=10000, \n \n verbose_eval = 100,early_stopping_rounds=100)\n \n\n y_pred_val = estimator_clf.predict(xgb.DMatrix(vl_x))\n oof_xgb[val_idx] = y_pred_val\n score = score + roc_auc_score(vl_y, y_pred_val)\n y_pred_test = estimator_clf.predict(xgb.DMatrix(test[features_columns]))\n xgb_preds.append(y_pred_test)\n\nprint('OOF score :',roc_auc_score(y, oof_xgb))","execution_count":34,"outputs":[{"output_type":"stream","text":"Fold: 1\n[0]\ttrain-auc:0.74145\tvalid-auc:0.73889\n[100]\ttrain-auc:0.78064\tvalid-auc:0.77392\n[200]\ttrain-auc:0.79850\tvalid-auc:0.78748\n[300]\ttrain-auc:0.80930\tvalid-auc:0.79452\n[400]\ttrain-auc:0.81595\tvalid-auc:0.79801\n[500]\ttrain-auc:0.82084\tvalid-auc:0.80011\n[600]\ttrain-auc:0.82438\tvalid-auc:0.80110\n[700]\ttrain-auc:0.82779\tvalid-auc:0.80196\n[800]\ttrain-auc:0.83074\tvalid-auc:0.80244\n[900]\ttrain-auc:0.83364\tvalid-auc:0.80274\n[1000]\ttrain-auc:0.83617\tvalid-auc:0.80306\n[1100]\ttrain-auc:0.83855\tvalid-auc:0.80327\n[1200]\ttrain-auc:0.84102\tvalid-auc:0.80334\n[1300]\ttrain-auc:0.84342\tvalid-auc:0.80347\n[1397]\ttrain-auc:0.84556\tvalid-auc:0.80342\nFold: 2\n[0]\ttrain-auc:0.74206\tvalid-auc:0.73457\n[100]\ttrain-auc:0.78121\tvalid-auc:0.76926\n[200]\ttrain-auc:0.79868\tvalid-auc:0.78281\n[300]\ttrain-auc:0.80974\tvalid-auc:0.79092\n[400]\ttrain-auc:0.81657\tvalid-auc:0.79498\n[500]\ttrain-auc:0.82125\tvalid-auc:0.79691\n[600]\ttrain-auc:0.82531\tvalid-auc:0.79837\n[700]\ttrain-auc:0.82855\tvalid-auc:0.79916\n[800]\ttrain-auc:0.83146\tvalid-auc:0.79971\n[900]\ttrain-auc:0.83422\tvalid-auc:0.80002\n[1000]\ttrain-auc:0.83685\tvalid-auc:0.80026\n[1100]\ttrain-auc:0.83925\tvalid-auc:0.80045\n[1200]\ttrain-auc:0.84175\tvalid-auc:0.80052\n[1300]\ttrain-auc:0.84412\tvalid-auc:0.80069\n[1390]\ttrain-auc:0.84625\tvalid-auc:0.80058\nFold: 3\n[0]\ttrain-auc:0.74142\tvalid-auc:0.73806\n[100]\ttrain-auc:0.78138\tvalid-auc:0.77336\n[200]\ttrain-auc:0.79953\tvalid-auc:0.78600\n[300]\ttrain-auc:0.81032\tvalid-auc:0.79224\n[400]\ttrain-auc:0.81680\tvalid-auc:0.79520\n[500]\ttrain-auc:0.82157\tvalid-auc:0.79683\n[600]\ttrain-auc:0.82510\tvalid-auc:0.79757\n[700]\ttrain-auc:0.82850\tvalid-auc:0.79814\n[800]\ttrain-auc:0.83150\tvalid-auc:0.79855\n[900]\ttrain-auc:0.83432\tvalid-auc:0.79876\n[1000]\ttrain-auc:0.83707\tvalid-auc:0.79881\n[1100]\ttrain-auc:0.83951\tvalid-auc:0.79890\n[1187]\ttrain-auc:0.84172\tvalid-auc:0.79884\nFold: 4\n[0]\ttrain-auc:0.74220\tvalid-auc:0.73824\n[100]\ttrain-auc:0.78137\tvalid-auc:0.77385\n[200]\ttrain-auc:0.79813\tvalid-auc:0.78707\n[300]\ttrain-auc:0.80948\tvalid-auc:0.79505\n[400]\ttrain-auc:0.81598\tvalid-auc:0.79867\n[500]\ttrain-auc:0.82092\tvalid-auc:0.80101\n[600]\ttrain-auc:0.82460\tvalid-auc:0.80220\n[700]\ttrain-auc:0.82787\tvalid-auc:0.80290\n[800]\ttrain-auc:0.83081\tvalid-auc:0.80341\n[900]\ttrain-auc:0.83362\tvalid-auc:0.80377\n[1000]\ttrain-auc:0.83633\tvalid-auc:0.80405\n[1100]\ttrain-auc:0.83878\tvalid-auc:0.80417\n[1200]\ttrain-auc:0.84130\tvalid-auc:0.80422\n[1300]\ttrain-auc:0.84359\tvalid-auc:0.80429\n[1400]\ttrain-auc:0.84595\tvalid-auc:0.80425\n[1451]\ttrain-auc:0.84712\tvalid-auc:0.80429\nFold: 5\n[0]\ttrain-auc:0.74109\tvalid-auc:0.73568\n[100]\ttrain-auc:0.78148\tvalid-auc:0.77249\n[200]\ttrain-auc:0.79892\tvalid-auc:0.78576\n[300]\ttrain-auc:0.80975\tvalid-auc:0.79338\n[400]\ttrain-auc:0.81626\tvalid-auc:0.79729\n[500]\ttrain-auc:0.82108\tvalid-auc:0.79939\n[600]\ttrain-auc:0.82481\tvalid-auc:0.80072\n[700]\ttrain-auc:0.82828\tvalid-auc:0.80163\n[800]\ttrain-auc:0.83121\tvalid-auc:0.80220\n[900]\ttrain-auc:0.83400\tvalid-auc:0.80255\n[1000]\ttrain-auc:0.83665\tvalid-auc:0.80272\n[1100]\ttrain-auc:0.83932\tvalid-auc:0.80298\n[1200]\ttrain-auc:0.84181\tvalid-auc:0.80310\n[1300]\ttrain-auc:0.84416\tvalid-auc:0.80306\n[1342]\ttrain-auc:0.84506\tvalid-auc:0.80304\nOOF score : 0.8020215700198703\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"xgboost_preds = np.mean(xgb_preds,axis=0)","execution_count":35,"outputs":[]},{"metadata":{"id":"EApbw1EmIziF"},"cell_type":"markdown","source":"# 4-Let's try Local Ensemble"},{"metadata":{"id":"zE4v4KWVAg-R","outputId":"a5975d65-1b07-46b3-f485-34b20e400603","trusted":true},"cell_type":"code","source":"#Validation Ensemble\nblend = oof_cat*0.5+ oof_lgb*0.4 + oof_xgb*0.1\nroc_auc_score(y,blend ) ","execution_count":36,"outputs":[{"output_type":"execute_result","execution_count":36,"data":{"text/plain":"0.805004547455695"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":"# 5-Submission"},{"metadata":{"trusted":true,"id":"rfKGEbPo2QJH"},"cell_type":"code","source":"test['target'] =catboost_preds*0.5+ lightgbm_preds*0.4 +xgboost_preds*0.1\nsubmission = test[['ID', 'target']]\nsubmission.to_csv('UmojaHack-Challenge#3-Top1-Solution.csv',index = False)","execution_count":37,"outputs":[]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.7.9","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":4} --------------------------------------------------------------------------------