├── .all-contributorsrc ├── .gitignore ├── LICENSE ├── README.md ├── arguments.py ├── baselines ├── canonical_ensemble.py └── canonical_resampling.py ├── data ├── Mammo_0.1noised_test.csv ├── Mammo_0.1noised_train.csv ├── Mammo_0.1noised_valid.csv ├── Mammo_0.25noised_test.csv ├── Mammo_0.25noised_train.csv ├── Mammo_0.25noised_valid.csv ├── Mammo_0.4noised_test.csv ├── Mammo_0.4noised_train.csv ├── Mammo_0.4noised_valid.csv ├── Mammo_test.csv ├── Mammo_train.csv └── Mammo_valid.csv ├── environment.py ├── main.py ├── mesa-example.ipynb ├── mesa.py ├── requirements.txt ├── sac_src ├── model.py ├── replay_memory.py ├── sac.py └── utils.py └── utils.py /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | "README.md" 4 | ], 5 | "imageSize": 100, 6 | "commit": false, 7 | "badgeTemplate": "-orange.svg\">", 8 | "contributors": [ 9 | { 10 | "login": "ZhiningLiu1998", 11 | "name": "Zhining Liu", 12 | "avatar_url": "https://avatars.githubusercontent.com/u/26108487?v=4", 13 | "profile": "http://zhiningliu.com", 14 | "contributions": [ 15 | "ideas", 16 | "code" 17 | ] 18 | } 19 | ], 20 | "contributorsPerLine": 7, 21 | "projectName": "mesa", 22 | "projectOwner": "ZhiningLiu1998", 23 | "repoType": "github", 24 | "repoHost": "https://github.com", 25 | "skipCi": true 26 | } 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Zhining Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

MESA: Meta-sampler for imbalanced learning

2 | 3 |

4 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |

31 | 32 |

MESA: Boost Ensemble Imbalanced Learning with MEta-SAmpler (NeurIPS 2020) 33 |

34 | 35 |

36 | Links: 37 | Paper | 38 | PDF with Appendix | 39 | Video | 40 | arXiv | 41 | Zhihu/知乎 42 |

43 | 44 | **MESA is a ***meta-learning-based ensemble learning framework*** for solving class-imbalanced learning problems. It is a task-agnostic general-purpose solution that is able to boost most of the existing machine learning models' performance on imbalanced data.** 45 | 46 | 47 | 48 | # Cite Us 49 | 50 | **If you find this repository helpful in your work or research, we would greatly appreciate citations to the following paper:** 51 | 52 | ``` 53 | @inproceedings{liu2020mesa, 54 | title={MESA: Boost Ensemble Imbalanced Learning with MEta-SAmpler}, 55 | author={Liu, Zhining and Wei, Pengfei and Jiang, Jing and Cao, Wei and Bian, Jiang and Chang, Yi}, 56 | booktitle={Conference on Neural Information Processing Systems}, 57 | year={2020}, 58 | } 59 | ``` 60 | 61 | # Table of Contents 62 | 63 | - [Cite Us](#cite-us) 64 | - [Table of Contents](#table-of-contents) 65 | - [Background](#background) 66 | - [About MESA](#about-mesa) 67 | - [Pros and Cons of MESA](#pros-and-cons-of-mesa) 68 | - [Requirements](#requirements) 69 | - [Usage](#usage) 70 | - [Running main.py](#running-mainpy) 71 | - [Running mesa-example.ipynb](#running-mesa-exampleipynb) 72 | - [Visualization and Results](#visualization-and-results) 73 | - [From mesa-example.ipynb](#from-mesa-exampleipynb) 74 | - [Class distribution of Mammography dataset](#class-distribution-of-mammography-dataset) 75 | - [Visualize the meta-training process](#visualize-the-meta-training-process) 76 | - [Comparison with baseline methods](#comparison-with-baseline-methods) 77 | - [Other results](#other-results) 78 | - [Dataset description](#dataset-description) 79 | - [Comparisons of MESA with under-sampling-based EIL methods](#comparisons-of-mesa-with-under-sampling-based-eil-methods) 80 | - [Comparisons of MESA with over-sampling-based EIL methods](#comparisons-of-mesa-with-over-sampling-based-eil-methods) 81 | - [Comparisons of MESA with resampling-based EIL methods](#comparisons-of-mesa-with-resampling-based-eil-methods) 82 | - [Miscellaneous](#miscellaneous) 83 | - [References](#references) 84 | - [Contributors ✨](#contributors-) 85 | 86 | 87 | # Background 88 | 89 | ## About MESA 90 | 91 | We introduce a novel ensemble imbalanced learning (EIL) framework named MESA. It adaptively resamples the training set in iterations to get multiple classifiers and forms a cascade ensemble model. MESA directly learns a parameterized sampling strategy (i.e., meta-sampler) from data to optimize the final metric beyond following random heuristics. It consists of three parts: ***meta sampling*** as well as ***ensemble training*** to build ensemble classifiers, and ***meta-training*** to optimize the meta-sampler. 92 | 93 | The figure below gives an overview of the MESA framework. 94 | 95 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/framework.png) 96 | 97 | ## Pros and Cons of MESA 98 | 99 | Here are some personal thoughts on the advantages and disadvantages of MESA. More discussions are welcome! 100 | 101 | **Pros:** 102 | - 🍎 *Wide compatiblilty.* 103 | We decoupled the model-training and meta-training process in MESA, making it compatible with most of the existing machine learning models. 104 | - 🍎 *High data efficiency.* 105 | MESA performs strictly balanced under-sampling to train each base-learner in the ensemble. This makes it more data-efficient than other methods, especially on highly skewed data sets. 106 | - 🍎 *Good performance.* 107 | The sampling strategy is optimized for better final generalization performance, we expect this can provide us with a better ensemble model. 108 | - 🍎 *Transferability.* 109 | We use only task-agnostic meta-information during meta-training, which means that a meta-sampler can be directly used in unseen new tasks, thereby greatly reducing the computational cost brought about by meta-training. 110 | 111 | **Cons:** 112 | - 🍏 *Meta-training cost.* 113 | Meta-training repeats the ensemble training process multiple times, which can be costly in practice (By shrinking the dataset used in meta-training, the computational cost can be reduced at the cost of minor performance loss). 114 | - 🍏 *Need to set aside a separate validation set for training.* 115 | The meta-state is formed by computing the error distribution on both the training and validation sets. 116 | - 🍏 *Possible unstable performance on small datasets.* 117 | Small datasets may cause the obtained error distribution statistics to be inaccurate/unstable, which will interfere with the meta-training process. 118 | 119 | # Requirements 120 | **Main dependencies:** 121 | - [Python](https://www.python.org/) (>=3.5) 122 | - [PyTorch](https://pytorch.org/) (=1.0.0) 123 | - [Gym](https://gym.openai.com/) (>=0.17.3) 124 | - [pandas](https://pandas.pydata.org/) (>=0.23.4) 125 | - [numpy](https://numpy.org/) (>=1.11) 126 | - [scikit-learn](https://scikit-learn.org/stable/) (>=0.20.1) 127 | - [imbalanced-learn](https://imbalanced-learn.readthedocs.io/en/stable/index.html) (=0.5.0, optional, for baseline methods) 128 | 129 | To install requirements, run: 130 | 131 | ```Shell 132 | pip install -r requirements.txt 133 | ``` 134 | 135 | > **NOTE**: this implementation requires an old version of PyTorch (v1.0.0). 136 | > You may want to start a new conda environment to run our code. The step-by-step guide is as follows (using torch-cpu for an example): 137 | > - `conda create --name mesa python=3.7.11` 138 | > - `conda activate mesa` 139 | > - `conda install pytorch-cpu==1.0.0 torchvision-cpu==0.2.1 cpuonly -c pytorch` 140 | > - `pip install -r requirements.txt` 141 | > 142 | > These commands should help you to get ready for running mesa. If you have any further questions, please feel free to open an issue or drop me an email. 143 | 144 | # Usage 145 | 146 | A typical usage example: 147 | 148 | ```python 149 | # load dataset & prepare environment 150 | args = parser.parse_args() 151 | rater = Rater(args.metric) 152 | X_train, y_train, X_valid, y_valid, X_test, y_test = load_dataset(args.dataset) 153 | base_estimator = DecisionTreeClassifier() 154 | 155 | # meta-training 156 | mesa = Mesa( 157 | args=args, 158 | base_estimator=base_estimator, 159 | n_estimators=10) 160 | mesa.meta_fit(X_train, y_train, X_valid, y_valid, X_test, y_test) 161 | 162 | # ensemble training 163 | mesa.fit(X_train, y_train, X_valid, y_valid) 164 | 165 | # evaluate 166 | y_pred_test = mesa.predict_proba(X_test)[:, 1] 167 | score = rater.score(y_test, y_pred_test) 168 | ``` 169 | 170 | ## Running [main.py](https://github.com/ZhiningLiu1998/mesa/blob/master/main.py) 171 | 172 | Here is an example: 173 | 174 | ```powershell 175 | python main.py --dataset Mammo --meta_verbose 10 --update_steps 1000 176 | ``` 177 | 178 | You can get help with arguments by running: 179 | 180 | ```powershell 181 | python main.py --help 182 | ``` 183 | 184 | ``` 185 | optional arguments: 186 | # Soft Actor-critic Arguments 187 | -h, --help show this help message and exit 188 | --env-name ENV_NAME 189 | --policy POLICY Policy Type: Gaussian | Deterministic (default: 190 | Gaussian) 191 | --eval EVAL Evaluates a policy every 10 episode (default: 192 | True) 193 | --gamma G discount factor for reward (default: 0.99) 194 | --tau G target smoothing coefficient(τ) (default: 0.01) 195 | --lr G learning rate (default: 0.001) 196 | --lr_decay_steps N step_size of StepLR learning rate decay scheduler 197 | (default: 10) 198 | --lr_decay_gamma N gamma of StepLR learning rate decay scheduler 199 | (default: 0.99) 200 | --alpha G Temperature parameter α determines the relative 201 | importance of the entropy term against the reward 202 | (default: 0.1) 203 | --automatic_entropy_tuning G 204 | Automaically adjust α (default: False) 205 | --seed N random seed (default: None) 206 | --batch_size N batch size (default: 64) 207 | --hidden_size N hidden size (default: 50) 208 | --updates_per_step N model updates per simulator step (default: 1) 209 | --update_steps N maximum number of steps (default: 1000) 210 | --start_steps N Steps sampling random actions (default: 500) 211 | --target_update_interval N 212 | Value target update per no. of updates per step 213 | (default: 1) 214 | --replay_size N size of replay buffer (default: 1000) 215 | 216 | # Mesa Arguments 217 | --cuda run on CUDA (default: False) 218 | --dataset N the dataset used for meta-training (default: Mammo) 219 | --metric N the metric used for evaluate (default: aucprc) 220 | --reward_coefficient N 221 | --num_bins N number of bins (default: 5). state-size = 2 * 222 | num_bins. 223 | --sigma N sigma of the Gaussian function used in meta-sampling 224 | (default: 0.2) 225 | --max_estimators N maximum number of base estimators in each meta- 226 | training episode (default: 10) 227 | --meta_verbose N number of episodes between verbose outputs. If 'full' 228 | print log for each base estimator (default: 10) 229 | --meta_verbose_mean_episodes N 230 | number of episodes used for compute latest mean score 231 | in verbose outputs. 232 | --verbose N enable verbose when ensemble fit (default: False) 233 | --random_state N random_state (default: None) 234 | --train_ir N imbalance ratio of the training set after meta- 235 | sampling (default: 1) 236 | --train_ratio N the ratio of the data used in meta-training. set 237 | train_ratio<1 to use a random subset for meta-training 238 | (default: 1) 239 | ``` 240 | 241 | ## Running [mesa-example.ipynb](https://github.com/ZhiningLiu1998/mesa/blob/master/mesa-example.ipynb) 242 | 243 | We include a highly imbalanced dataset [Mammography](https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.datasets.fetch_datasets.html#imblearn.datasets.fetch_datasets) (#majority class instances = 10,923, #minority class instances = 260, imbalance ratio = 42.012) and its variants with flip label noise for quick testing and visualization of MESA and other baselines. 244 | You can use [mesa-example.ipynb](https://github.com/ZhiningLiu1998/mesa/blob/master/mesa-example.ipynb) to quickly: 245 | - conduct a comparative experiment 246 | - visualize the meta-training process of MESA 247 | - visualize the experimental results of MESA and other baselines 248 | 249 | **Please check [mesa-example.ipynb](https://github.com/ZhiningLiu1998/mesa/blob/master/mesa-example.ipynb) for more details.** 250 | 251 | # Visualization and Results 252 | 253 | ## From [mesa-example.ipynb](https://github.com/ZhiningLiu1998/mesa/blob/master/mesa-example.ipynb) 254 | 255 | ### Class distribution of Mammography dataset 256 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/class-distribution.png) 257 | 258 | ### Visualize the meta-training process 259 | 260 |

261 | 262 |

263 | 264 | ### Comparison with baseline methods 265 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/result.png) 266 | 267 | ## Other results 268 | 269 | ### Dataset description 270 | 271 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/datasets.png) 272 | 273 | ### Comparisons of MESA with under-sampling-based EIL methods 274 | 275 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/comp-USEIL.png) 276 | 277 | ### Comparisons of MESA with over-sampling-based EIL methods 278 | 279 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/comp-OSEIL.png) 280 | 281 | ### Comparisons of MESA with resampling-based EIL methods 282 | 283 | ![image](https://github.com/ZhiningLiu1998/figures/blob/master/mesa/comp-resample.png) 284 | 285 | 286 | 287 | # Miscellaneous 288 | 289 | **Check out our previous work [Self-paced Ensemble](https://github.com/ZhiningLiu1998/self-paced-ensemble) (ICDE 2020). 290 | It is a simple heuristic-based method, but being very fast and works reasonably well.** 291 | 292 | **This repository contains:** 293 | - Implementation of MESA 294 | - Implementation of 7 ensemble imbalanced learning baselines 295 | - `SMOTEBoost` [1] 296 | - `SMOTEBagging` [2] 297 | - `RAMOBoost` [3] 298 | - `RUSBoost` [4] 299 | - `UnderBagging` [5] 300 | - `BalanceCascade` [6] 301 | - `SelfPacedEnsemble` [7] 302 | - Implementation of 11 resampling imbalanced learning baselines [8] 303 | 304 | > **NOTE:** The implementations of the above baseline methods are based on [imbalanced-algorithms](https://github.com/dialnd/imbalanced-algorithms) and [imbalanced-learn](https://github.com/scikit-learn-contrib/imbalanced-learn). 305 | 306 | # References 307 | 308 | | # | Reference | 309 | |-----|-------| 310 | | [1] | N. V. Chawla, A. Lazarevic, L. O. Hall, and K. W. Bowyer, Smoteboost: Improving prediction of the minority class in boosting. in European conference on principles of data mining and knowledge discovery. Springer, 2003, pp. 107–119| 311 | | [2] | S. Wang and X. Yao, Diversity analysis on imbalanced data sets by using ensemble models. in 2009 IEEE Symposium on Computational Intelligence and Data Mining. IEEE, 2009, pp. 324–331.| 312 | | [3] | Sheng Chen, Haibo He, and Edwardo A Garcia. 2010. RAMOBoost: ranked minority oversampling in boosting. IEEE Transactions on Neural Networks 21, 10 (2010), 1624–1642.| 313 | | [4] | C. Seiffert, T. M. Khoshgoftaar, J. Van Hulse, and A. Napolitano, Rusboost: A hybrid approach to alleviating class imbalance. IEEE Transactions on Systems, Man, and Cybernetics-Part A: Systems and Humans, vol. 40, no. 1, pp. 185–197, 2010.| 314 | | [5] | R. Barandela, R. M. Valdovinos, and J. S. Sanchez, New applications´ of ensembles of classifiers. Pattern Analysis & Applications, vol. 6, no. 3, pp. 245–256, 2003.| 315 | | [6] | X.-Y. Liu, J. Wu, and Z.-H. Zhou, Exploratory undersampling for class-imbalance learning. IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), vol. 39, no. 2, pp. 539–550, 2009. | 316 | | [7] | Zhining Liu, Wei Cao, Zhifeng Gao, Jiang Bian, Hechang Chen, Yi Chang, and Tie-Yan Liu. 2019. Self-paced Ensemble for Highly Imbalanced Massive Data Classification. 2020 IEEE 36th International Conference on Data Engineering (ICDE). IEEE, 2020, pp. 841-852. 317 | | [8] | Guillaume Lemaître, Fernando Nogueira, and Christos K. Aridas. Imbalanced-learn: A python toolbox to tackle the curse of imbalanced datasets in machine learning. Journal of Machine Learning Research, 18(17):1–5, 2017. | 318 | ## Contributors ✨ 319 | 320 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 |

Zhining Liu

🤔 💻
330 | 331 | 332 | 333 | 334 | 335 | 336 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! 337 | -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser(description='Mesa Arguments') 4 | parser.add_argument('--env-name', default="MESA-SAC") 5 | 6 | # SAC arguments 7 | parser.add_argument('--policy', default="Gaussian", 8 | help='Policy Type: Gaussian | Deterministic (default: Gaussian)') 9 | parser.add_argument('--eval', type=bool, default=True, 10 | help='Evaluates a policy every 10 episode (default: True)') 11 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 12 | help='discount factor for reward (default: 0.99)') 13 | parser.add_argument('--tau', type=float, default=0.01, metavar='G', 14 | help='target smoothing coefficient(τ) (default: 0.01)') 15 | parser.add_argument('--lr', type=float, default=0.001, metavar='G', 16 | help='learning rate (default: 0.001)') 17 | parser.add_argument('--lr_decay_steps', type=int, default=10, metavar='N', 18 | help='step_size of StepLR learning rate decay scheduler (default: 10)') 19 | parser.add_argument('--lr_decay_gamma', type=float, default=0.99, metavar='N', 20 | help='gamma of StepLR learning rate decay scheduler (default: 0.99)') 21 | parser.add_argument('--alpha', type=float, default=0.1, metavar='G', 22 | help='Temperature parameter α determines the relative importance of the entropy\ 23 | term against the reward (default: 0.1)') 24 | parser.add_argument('--automatic_entropy_tuning', type=bool, default=False, metavar='G', 25 | help='Automaically adjust α (default: False)') 26 | parser.add_argument('--seed', type=int, default=None, metavar='N', 27 | help='random seed (default: None)') 28 | parser.add_argument('--batch_size', type=int, default=64, metavar='N', 29 | help='batch size (default: 64)') 30 | parser.add_argument('--hidden_size', type=int, default=50, metavar='N', 31 | help='hidden size (default: 50)') 32 | parser.add_argument('--updates_per_step', type=int, default=1, metavar='N', 33 | help='model updates per simul|ator step (default: 1)') 34 | parser.add_argument('--update_steps', type=int, default=1000, metavar='N', 35 | help='maximum number of steps (default: 1000)') 36 | parser.add_argument('--start_steps', type=int, default=500, metavar='N', 37 | help='Steps sampling random actions (default: 500)') 38 | parser.add_argument('--target_update_interval', type=int, default=1, metavar='N', 39 | help='Value target update per no. of updates per step (default: 1)') 40 | parser.add_argument('--replay_size', type=int, default=1000, metavar='N', 41 | help='size of replay buffer (default: 1000)') 42 | parser.add_argument('--cuda', action="store_true", default=False, 43 | help='run on CUDA (default: False)') 44 | 45 | # MESA arguments 46 | parser.add_argument('--dataset', type=str, default='Mammo', metavar='N', 47 | help='the dataset used for meta-training (default: Mammo)') 48 | parser.add_argument('--metric', type=str, default='aucprc', metavar='N', 49 | help='the metric used for evaluate (default: aucprc)') 50 | parser.add_argument('--reward_coefficient', type=float, default=100, metavar='N') 51 | parser.add_argument('--num_bins', type=int, default=5, metavar='N', 52 | help='number of bins (default: 5). state-size = 2 * num_bins.') 53 | parser.add_argument('--sigma', type=float, default=0.2, metavar='N', 54 | help='sigma of the Gaussian function used in meta-sampling (default: 0.2)') 55 | parser.add_argument('--max_estimators', type=int, default=10, metavar='N', 56 | help='maximum number of base estimators in each meta-training episode (default: 10)') 57 | parser.add_argument('--meta_verbose', type=int, default=10, metavar='N', 58 | help='number of episodes between verbose outputs. \ 59 | If \'full\' print log for each base estimator (default: 10)') 60 | parser.add_argument('--meta_verbose_mean_episodes', type=int, default=25, metavar='N', 61 | help='number of episodes used for compute latest mean score in verbose outputs.') 62 | parser.add_argument('--verbose', type=bool, default=False, metavar='N', 63 | help='enable verbose when ensemble fit (default: False)') 64 | parser.add_argument('--random_state', type=int, default=None, metavar='N', 65 | help='random_state (default: None)') 66 | parser.add_argument('--train_ir', type=float, default=1, metavar='N', 67 | help='imbalance ratio of the training set after meta-sampling (default: 1)') 68 | parser.add_argument('--train_ratio', type=float, default=1, metavar='N', 69 | help='the ratio of the data used in meta-training. \ 70 | set train_ratio<1 to use a random subset for meta-training (default: 1)') -------------------------------------------------------------------------------- /baselines/canonical_ensemble.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Jan 13 14:32:27 2020 4 | @author: ZhiningLiu1998 5 | mailto: zhining.liu@outlook.com 6 | 7 | NOTE: The implementation of SMOTEBoost/RUSBoost/RAMOBoost was obtained from 8 | imbalanced-algorithms: https://github.com/dialnd/imbalanced-algorithms 9 | """ 10 | 11 | import numpy as np 12 | import sklearn 13 | from sklearn.base import is_regressor 14 | from sklearn.ensemble import AdaBoostClassifier 15 | from sklearn.tree import DecisionTreeClassifier 16 | from sklearn.ensemble.forest import BaseForest 17 | from sklearn.neighbors import NearestNeighbors 18 | from sklearn.preprocessing import normalize 19 | from sklearn.tree.tree import BaseDecisionTree 20 | from sklearn.utils import check_random_state 21 | from sklearn.utils import check_X_y 22 | from sklearn.utils import check_array 23 | from sklearn.preprocessing import binarize 24 | from utils import * 25 | from collections import Counter 26 | import warnings 27 | warnings.filterwarnings("ignore") 28 | 29 | 30 | class SMOTE(object): 31 | """Implementation of Synthetic Minority Over-Sampling Technique (SMOTE). 32 | SMOTE performs oversampling of the minority class by picking target 33 | minority class samples and their nearest minority class neighbors and 34 | generating new samples that linearly combine features of each target 35 | sample with features of its selected minority class neighbors [1]. 36 | Parameters 37 | ---------- 38 | k_neighbors : int, optional (default=5) 39 | Number of nearest neighbors. 40 | random_state : int or None, optional (default=None) 41 | If int, random_state is the seed used by the random number generator. 42 | If None, the random number generator is the RandomState instance used 43 | by np.random. 44 | References 45 | ---------- 46 | .. [1] N. V. Chawla, K. W. Bowyer, L. O. Hall, and P. Kegelmeyer. "SMOTE: 47 | Synthetic Minority Over-Sampling Technique." Journal of Artificial 48 | Intelligence Research (JAIR), 2002. 49 | """ 50 | 51 | def __init__(self, k_neighbors=5, random_state=None): 52 | self.k = k_neighbors 53 | self.random_state = random_state 54 | 55 | def sample(self, n_samples): 56 | """Generate samples. 57 | Parameters 58 | ---------- 59 | n_samples : int 60 | Number of new synthetic samples. 61 | Returns 62 | ------- 63 | S : array, shape = [n_samples, n_features] 64 | Returns synthetic samples. 65 | """ 66 | np.random.seed(seed=self.random_state) 67 | 68 | S = np.zeros(shape=(n_samples, self.n_features)) 69 | # Calculate synthetic samples. 70 | for i in range(n_samples): 71 | j = np.random.randint(0, self.X.shape[0]) 72 | 73 | # Find the NN for each sample. 74 | # Exclude the sample itself. 75 | nn = self.neigh.kneighbors(self.X[j].reshape(1, -1), 76 | return_distance=False)[:, 1:] 77 | nn_index = np.random.choice(nn[0]) 78 | 79 | dif = self.X[nn_index] - self.X[j] 80 | gap = np.random.random() 81 | 82 | S[i, :] = self.X[j, :] + gap * dif[:] 83 | 84 | return S 85 | 86 | def fit(self, X): 87 | """Train model based on input data. 88 | Parameters 89 | ---------- 90 | X : array-like, shape = [n_minority_samples, n_features] 91 | Holds the minority samples. 92 | """ 93 | self.X = X 94 | self.n_minority_samples, self.n_features = self.X.shape 95 | 96 | # Learn nearest neighbors. 97 | self.neigh = NearestNeighbors(n_neighbors=self.k + 1) 98 | self.neigh.fit(self.X) 99 | 100 | return self 101 | 102 | class SMOTEBoost(AdaBoostClassifier): 103 | """Implementation of SMOTEBoost. 104 | SMOTEBoost introduces data sampling into the AdaBoost algorithm by 105 | oversampling the minority class using SMOTE on each boosting iteration [1]. 106 | This implementation inherits methods from the scikit-learn 107 | AdaBoostClassifier class, only modifying the `fit` method. 108 | Parameters 109 | ---------- 110 | n_samples : int, optional (default=100) 111 | Number of new synthetic samples per boosting step. 112 | k_neighbors : int, optional (default=5) 113 | Number of nearest neighbors. 114 | base_estimator : object, optional (default=DecisionTreeClassifier) 115 | The base estimator from which the boosted ensemble is built. 116 | Support for sample weighting is required, as well as proper `classes_` 117 | and `n_classes_` attributes. 118 | n_estimators : int, optional (default=50) 119 | The maximum number of estimators at which boosting is terminated. 120 | In case of perfect fit, the learning procedure is stopped early. 121 | learning_rate : float, optional (default=1.) 122 | Learning rate shrinks the contribution of each classifier by 123 | ``learning_rate``. There is a trade-off between ``learning_rate`` and 124 | ``n_estimators``. 125 | algorithm : {'SAMME', 'SAMME.R'}, optional (default='SAMME.R') 126 | If 'SAMME.R' then use the SAMME.R real boosting algorithm. 127 | ``base_estimator`` must support calculation of class probabilities. 128 | If 'SAMME' then use the SAMME discrete boosting algorithm. 129 | The SAMME.R algorithm typically converges faster than SAMME, 130 | achieving a lower test error with fewer boosting iterations. 131 | random_state : int or None, optional (default=None) 132 | If int, random_state is the seed used by the random number generator. 133 | If None, the random number generator is the RandomState instance used 134 | by np.random. 135 | References 136 | ---------- 137 | .. [1] N. V. Chawla, A. Lazarevic, L. O. Hall, and K. W. Bowyer. 138 | "SMOTEBoost: Improving Prediction of the Minority Class in 139 | Boosting." European Conference on Principles of Data Mining and 140 | Knowledge Discovery (PKDD), 2003. 141 | """ 142 | 143 | def __init__(self, 144 | n_samples=100, 145 | k_neighbors=5, 146 | base_estimator=None, 147 | n_estimators=50, 148 | learning_rate=1., 149 | algorithm='SAMME.R', 150 | random_state=None): 151 | 152 | self.n_samples = n_samples 153 | self.algorithm = algorithm 154 | self.smote = SMOTE(k_neighbors=k_neighbors, 155 | random_state=random_state) 156 | 157 | super(SMOTEBoost, self).__init__( 158 | base_estimator=base_estimator, 159 | n_estimators=n_estimators, 160 | learning_rate=learning_rate, 161 | random_state=random_state) 162 | 163 | def fit(self, X, y, sample_weight=None, minority_target=None): 164 | """Build a boosted classifier/regressor from the training set (X, y), 165 | performing SMOTE during each boosting step. 166 | Parameters 167 | ---------- 168 | X : {array-like, sparse matrix} of shape = [n_samples, n_features] 169 | The training input samples. Sparse matrix can be CSC, CSR, COO, 170 | DOK, or LIL. COO, DOK, and LIL are converted to CSR. The dtype is 171 | forced to DTYPE from tree._tree if the base classifier of this 172 | ensemble weighted boosting classifier is a tree or forest. 173 | y : array-like of shape = [n_samples] 174 | The target values (class labels in classification, real numbers in 175 | regression). 176 | sample_weight : array-like of shape = [n_samples], optional 177 | Sample weights. If None, the sample weights are initialized to 178 | 1 / n_samples. 179 | minority_target : int 180 | Minority class label. 181 | Returns 182 | ------- 183 | self : object 184 | Returns self. 185 | Notes 186 | ----- 187 | Based on the scikit-learn v0.18 AdaBoostClassifier and 188 | BaseWeightBoosting `fit` methods. 189 | """ 190 | # Check that algorithm is supported. 191 | if self.algorithm not in ('SAMME', 'SAMME.R'): 192 | raise ValueError("algorithm %s is not supported" % self.algorithm) 193 | 194 | # Check parameters. 195 | if self.learning_rate <= 0: 196 | raise ValueError("learning_rate must be greater than zero") 197 | 198 | if (self.base_estimator is None or 199 | isinstance(self.base_estimator, (BaseDecisionTree, 200 | BaseForest))): 201 | DTYPE = np.float64 # from fast_dict.pxd 202 | dtype = DTYPE 203 | accept_sparse = 'csc' 204 | else: 205 | dtype = None 206 | accept_sparse = ['csr', 'csc'] 207 | 208 | X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype, 209 | y_numeric=is_regressor(self)) 210 | 211 | if sample_weight is None: 212 | # Initialize weights to 1 / n_samples. 213 | sample_weight = np.empty(X.shape[0], dtype=np.float64) 214 | sample_weight[:] = 1. / X.shape[0] 215 | else: 216 | sample_weight = check_array(sample_weight, ensure_2d=False) 217 | # Normalize existing weights. 218 | sample_weight = sample_weight / sample_weight.sum(dtype=np.float64) 219 | 220 | # Check that the sample weights sum is positive. 221 | if sample_weight.sum() <= 0: 222 | raise ValueError( 223 | "Attempting to fit with a non-positive " 224 | "weighted number of samples.") 225 | 226 | if minority_target is None: 227 | # Determine the minority class label. 228 | stats_c_ = Counter(y) 229 | maj_c_ = max(stats_c_, key=stats_c_.get) 230 | min_c_ = min(stats_c_, key=stats_c_.get) 231 | self.minority_target = min_c_ 232 | else: 233 | self.minority_target = minority_target 234 | 235 | # Check parameters. 236 | self._validate_estimator() 237 | 238 | # Clear any previous fit results. 239 | self.estimators_ = [] 240 | self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64) 241 | self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64) 242 | 243 | random_state = check_random_state(self.random_state) 244 | 245 | self.total_training_instances = 0 246 | self.total_training_instances_list = [] 247 | for iboost in range(self.n_estimators): 248 | # SMOTE step. 249 | X_min = X[np.where(y == self.minority_target)] 250 | self.smote.fit(X_min) 251 | X_syn = self.smote.sample(self.n_samples) 252 | y_syn = np.full(X_syn.shape[0], fill_value=self.minority_target, 253 | dtype=np.int64) 254 | 255 | # Normalize synthetic sample weights based on current training set. 256 | sample_weight_syn = np.empty(X_syn.shape[0], dtype=np.float64) 257 | sample_weight_syn[:] = 1. / X.shape[0] 258 | 259 | # print ('Boosting Iter: {} n_train: {} n_smote: {}'.format( 260 | # iboost, len(X_min), len(y_syn))) 261 | 262 | # Combine the original and synthetic samples. 263 | X = np.vstack((X, X_syn)) 264 | y = np.append(y, y_syn) 265 | 266 | self.total_training_instances = self.total_training_instances + len(y) 267 | self.total_training_instances_list.append(self.total_training_instances) 268 | print(f'SMOTEBoost total training size: {self.total_training_instances}') 269 | 270 | # Combine the weights. 271 | sample_weight = \ 272 | np.append(sample_weight, sample_weight_syn).reshape(-1, 1) 273 | sample_weight = \ 274 | np.squeeze(normalize(sample_weight, axis=0, norm='l1')) 275 | 276 | # X, y, sample_weight = shuffle(X, y, sample_weight, 277 | # random_state=random_state) 278 | 279 | # Boosting step. 280 | sample_weight, estimator_weight, estimator_error = self._boost( 281 | iboost, 282 | X, y, 283 | sample_weight, 284 | random_state) 285 | 286 | # Early termination. 287 | if sample_weight is None: 288 | print('sample_weight: {}'.format(sample_weight)) 289 | break 290 | 291 | self.estimator_weights_[iboost] = estimator_weight 292 | self.estimator_errors_[iboost] = estimator_error 293 | 294 | # Stop if error is zero. 295 | # if estimator_error == 0: 296 | # print('error: {}'.format(estimator_error)) 297 | # break 298 | 299 | sample_weight_sum = np.sum(sample_weight) 300 | 301 | # Stop if the sum of sample weights has become non-positive. 302 | if sample_weight_sum <= 0: 303 | print('sample_weight_sum: {}'.format(sample_weight_sum)) 304 | break 305 | 306 | if iboost < self.n_estimators - 1: 307 | # Normalize. 308 | sample_weight /= sample_weight_sum 309 | 310 | return self 311 | 312 | class RankedMinorityOversampler(object): 313 | """Implementation of Ranked Minority Oversampling (RAMO). 314 | Oversample the minority class by picking samples according to a specified 315 | sampling distribution. 316 | Parameters 317 | ---------- 318 | k_neighbors_1 : int, optional (default=5) 319 | Number of nearest neighbors used to adjust the sampling probability of 320 | the minority examples. 321 | k_neighbors_2 : int, optional (default=5) 322 | Number of nearest neighbors used to generate the synthetic data 323 | instances. 324 | alpha : float, optional (default=0.3) 325 | Scaling coefficient. 326 | random_state : int or None, optional (default=None) 327 | If int, random_state is the seed used by the random number generator. 328 | If None, the random number generator is the RandomState instance used 329 | by np.random. 330 | """ 331 | 332 | def __init__(self, k_neighbors_1=5, k_neighbors_2=5, alpha=0.3, 333 | random_state=None): 334 | self.k_neighbors_1 = k_neighbors_1 335 | self.k_neighbors_2 = k_neighbors_2 336 | self.alpha = alpha 337 | self.random_state = random_state 338 | 339 | def sample(self, n_samples): 340 | """Generate samples. 341 | Parameters 342 | ---------- 343 | n_samples : int 344 | Number of new synthetic samples. 345 | Returns 346 | ------- 347 | S : array, shape = [n_samples, n_features] 348 | Returns synthetic samples. 349 | """ 350 | np.random.seed(seed=self.random_state) 351 | 352 | S = np.zeros(shape=(n_samples, self.n_features)) 353 | # Calculate synthetic samples. 354 | for i in range(n_samples): 355 | # Choose a sample according to the sampling distribution, r. 356 | j = np.random.choice(self.n_minority_samples, p=self.r) 357 | 358 | # Find the NN for each sample. 359 | # Exclude the sample itself. 360 | nn = self.neigh_2.kneighbors(self.X_min[j].reshape(1, -1), 361 | return_distance=False)[:, 1:] 362 | nn_index = np.random.choice(nn[0]) 363 | 364 | dif = self.X_min[nn_index] - self.X_min[j] 365 | gap = np.random.random() 366 | 367 | S[i, :] = self.X_min[j, :] + gap * dif[:] 368 | 369 | return S 370 | 371 | def fit(self, X, y, sample_weight=None, minority_target=None): 372 | """Train model based on input data. 373 | Parameters 374 | ---------- 375 | X : array-like, shape = [n_total_samples, n_features] 376 | Holds the majority and minority samples. 377 | y : array-like, shape = [n_total_samples] 378 | Holds the class targets for samples. 379 | sample_weight : array-like of shape = [n_samples], optional 380 | Sample weights multiplier. If None, the multiplier is 1. 381 | minority_target : int, optional (default=None) 382 | Minority class label. 383 | """ 384 | if minority_target is None: 385 | # Determine the minority class label. 386 | stats_c_ = Counter(y) 387 | maj_c_ = max(stats_c_, key=stats_c_.get) 388 | min_c_ = min(stats_c_, key=stats_c_.get) 389 | self.minority_target = min_c_ 390 | else: 391 | self.minority_target = minority_target 392 | 393 | self.X_min = X[y == self.minority_target] 394 | self.n_minority_samples, self.n_features = self.X_min.shape 395 | 396 | neigh_1 = NearestNeighbors(n_neighbors=self.k_neighbors_1 + 1) 397 | neigh_1.fit(X) 398 | nn = neigh_1.kneighbors(self.X_min, return_distance=False)[:, 1:] 399 | 400 | if sample_weight is None: 401 | sample_weight_min = np.ones(shape=(len(self.minority_target))) 402 | else: 403 | assert(len(y) == len(sample_weight)) 404 | sample_weight_min = sample_weight[y == self.minority_target] 405 | 406 | self.r = np.zeros(shape=(self.n_minority_samples)) 407 | for i in range(self.n_minority_samples): 408 | majority_neighbors = 0 409 | for n in nn[i]: 410 | if y[n] != self.minority_target: 411 | majority_neighbors += 1 412 | 413 | self.r[i] = 1. / (1 + np.exp(-self.alpha * majority_neighbors)) 414 | 415 | self.r = (self.r * sample_weight_min).reshape(1, -1) 416 | self.r = np.squeeze(normalize(self.r, axis=1, norm='l1')) 417 | 418 | # Learn nearest neighbors. 419 | self.neigh_2 = NearestNeighbors(n_neighbors=self.k_neighbors_2 + 1) 420 | self.neigh_2.fit(self.X_min) 421 | 422 | return self 423 | 424 | 425 | class RAMOBoost(AdaBoostClassifier): 426 | """Implementation of RAMOBoost. 427 | RAMOBoost introduces data sampling into the AdaBoost algorithm by 428 | oversampling the minority class according to a specified sampling 429 | distribution on each boosting iteration [1]. 430 | This implementation inherits methods from the scikit-learn 431 | AdaBoostClassifier class, only modifying the `fit` method. 432 | Parameters 433 | ---------- 434 | n_samples : int, optional (default=100) 435 | Number of new synthetic samples per boosting step. 436 | k_neighbors_1 : int, optional (default=5) 437 | Number of nearest neighbors used to adjust the sampling probability of 438 | the minority examples. 439 | k_neighbors_2 : int, optional (default=5) 440 | Number of nearest neighbors used to generate the synthetic data 441 | instances. 442 | alpha : float, optional (default=0.3) 443 | Scaling coefficient. 444 | base_estimator : object, optional (default=DecisionTreeClassifier) 445 | The base estimator from which the boosted ensemble is built. 446 | Support for sample weighting is required, as well as proper `classes_` 447 | and `n_classes_` attributes. 448 | n_estimators : int, optional (default=50) 449 | The maximum number of estimators at which boosting is terminated. 450 | In case of perfect fit, the learning procedure is stopped early. 451 | learning_rate : float, optional (default=1.) 452 | Learning rate shrinks the contribution of each classifier by 453 | ``learning_rate``. There is a trade-off between ``learning_rate`` and 454 | ``n_estimators``. 455 | algorithm : {'SAMME', 'SAMME.R'}, optional (default='SAMME.R') 456 | If 'SAMME.R' then use the SAMME.R real boosting algorithm. 457 | ``base_estimator`` must support calculation of class probabilities. 458 | If 'SAMME' then use the SAMME discrete boosting algorithm. 459 | The SAMME.R algorithm typically converges faster than SAMME, 460 | achieving a lower test error with fewer boosting iterations. 461 | random_state : int or None, optional (default=None) 462 | If int, random_state is the seed used by the random number generator. 463 | If None, the random number generator is the RandomState instance used 464 | by np.random. 465 | References 466 | ---------- 467 | .. [1] S. Chen, H. He, and E. A. Garcia. "RAMOBoost: Ranked Minority 468 | Oversampling in Boosting". IEEE Transactions on Neural Networks, 469 | 2010. 470 | """ 471 | 472 | def __init__(self, 473 | n_samples=100, 474 | k_neighbors_1=5, 475 | k_neighbors_2=5, 476 | alpha=0.3, 477 | base_estimator=None, 478 | n_estimators=50, 479 | learning_rate=1., 480 | algorithm='SAMME.R', 481 | random_state=None): 482 | 483 | self.n_samples = n_samples 484 | self.algorithm = algorithm 485 | self.ramo = RankedMinorityOversampler(k_neighbors_1, k_neighbors_2, 486 | alpha, random_state=random_state) 487 | 488 | super(RAMOBoost, self).__init__( 489 | base_estimator=base_estimator, 490 | n_estimators=n_estimators, 491 | learning_rate=learning_rate, 492 | random_state=random_state) 493 | 494 | def fit(self, X, y, sample_weight=None, minority_target=None): 495 | """Build a boosted classifier/regressor from the training set (X, y), 496 | performing random undersampling during each boosting step. 497 | Parameters 498 | ---------- 499 | X : {array-like, sparse matrix} of shape = [n_samples, n_features] 500 | The training input samples. Sparse matrix can be CSC, CSR, COO, 501 | DOK, or LIL. COO, DOK, and LIL are converted to CSR. The dtype is 502 | forced to DTYPE from tree._tree if the base classifier of this 503 | ensemble weighted boosting classifier is a tree or forest. 504 | y : array-like of shape = [n_samples] 505 | The target values (class labels in classification, real numbers in 506 | regression). 507 | sample_weight : array-like of shape = [n_samples], optional 508 | Sample weights. If None, the sample weights are initialized to 509 | 1 / n_samples. 510 | minority_target : int 511 | Minority class label. 512 | Returns 513 | ------- 514 | self : object 515 | Returns self. 516 | Notes 517 | ----- 518 | Based on the scikit-learn v0.18 AdaBoostClassifier and 519 | BaseWeightBoosting `fit` methods. 520 | """ 521 | # Check that algorithm is supported. 522 | if self.algorithm not in ('SAMME', 'SAMME.R'): 523 | raise ValueError("algorithm %s is not supported" % self.algorithm) 524 | 525 | # Check parameters. 526 | if self.learning_rate <= 0: 527 | raise ValueError("learning_rate must be greater than zero") 528 | 529 | if (self.base_estimator is None or 530 | isinstance(self.base_estimator, (BaseDecisionTree, 531 | BaseForest))): 532 | DTYPE = np.float64 # from fast_dict.pxd 533 | dtype = DTYPE 534 | accept_sparse = 'csc' 535 | else: 536 | dtype = None 537 | accept_sparse = ['csr', 'csc'] 538 | 539 | X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype, 540 | y_numeric=is_regressor(self)) 541 | 542 | if sample_weight is None: 543 | # Initialize weights to 1 / n_samples. 544 | sample_weight = np.empty(X.shape[0], dtype=np.float64) 545 | sample_weight[:] = 1. / X.shape[0] 546 | else: 547 | sample_weight = check_array(sample_weight, ensure_2d=False) 548 | # Normalize existing weights. 549 | sample_weight = sample_weight / sample_weight.sum(dtype=np.float64) 550 | 551 | # Check that the sample weights sum is positive. 552 | if sample_weight.sum() <= 0: 553 | raise ValueError( 554 | "Attempting to fit with a non-positive " 555 | "weighted number of samples.") 556 | 557 | if minority_target is None: 558 | # Determine the minority class label. 559 | stats_c_ = Counter(y) 560 | maj_c_ = max(stats_c_, key=stats_c_.get) 561 | min_c_ = min(stats_c_, key=stats_c_.get) 562 | self.minority_target = min_c_ 563 | else: 564 | self.minority_target = minority_target 565 | 566 | # Check parameters. 567 | self._validate_estimator() 568 | 569 | # Clear any previous fit results. 570 | self.estimators_ = [] 571 | self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64) 572 | self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64) 573 | 574 | random_state = check_random_state(self.random_state) 575 | 576 | self.total_training_instances = 0 577 | self.total_training_instances_list = [] 578 | for iboost in range(self.n_estimators): 579 | # RAMO step. 580 | self.ramo.fit(X, y, sample_weight=sample_weight) 581 | X_syn = self.ramo.sample(self.n_samples) 582 | y_syn = np.full(X_syn.shape[0], fill_value=self.minority_target, 583 | dtype=np.int64) 584 | 585 | # Combine the minority and majority class samples. 586 | X = np.vstack((X, X_syn)) 587 | y = np.append(y, y_syn) 588 | 589 | self.total_training_instances = self.total_training_instances + len(y) 590 | self.total_training_instances_list.append(self.total_training_instances) 591 | print (f'RAMOBoost total training size: {self.total_training_instances}') 592 | 593 | # Normalize synthetic sample weights based on current training set. 594 | sample_weight_syn = np.empty(X_syn.shape[0], dtype=np.float64) 595 | sample_weight_syn[:] = 1. / X.shape[0] 596 | 597 | # Combine the weights. 598 | sample_weight = \ 599 | np.append(sample_weight, sample_weight_syn).reshape(-1, 1) 600 | sample_weight = \ 601 | np.squeeze(normalize(sample_weight, axis=0, norm='l1')) 602 | 603 | # X, y, sample_weight = shuffle(X, y, sample_weight, 604 | # random_state=random_state) 605 | 606 | # Boosting step. 607 | sample_weight, estimator_weight, estimator_error = self._boost( 608 | iboost, 609 | X, y, 610 | sample_weight, 611 | random_state) 612 | 613 | # Early termination. 614 | # if sample_weight is None: 615 | # break 616 | 617 | self.estimator_weights_[iboost] = estimator_weight 618 | self.estimator_errors_[iboost] = estimator_error 619 | 620 | # Stop if error is zero. 621 | # if estimator_error == 0: 622 | # break 623 | 624 | sample_weight_sum = np.sum(sample_weight) 625 | 626 | # Stop if the sum of sample weights has become non-positive. 627 | # if sample_weight_sum <= 0: 628 | # break 629 | 630 | if iboost < self.n_estimators - 1: 631 | # Normalize. 632 | sample_weight /= sample_weight_sum 633 | 634 | return self 635 | 636 | class RandomUnderSampler(object): 637 | """Implementation of random undersampling (RUS). 638 | Undersample the majority class(es) by randomly picking samples with or 639 | without replacement. 640 | Parameters 641 | ---------- 642 | with_replacement : bool, optional (default=True) 643 | Undersample with replacement. 644 | return_indices : bool, optional (default=False) 645 | Whether or not to return the indices of the samples randomly selected 646 | from the majority class. 647 | random_state : int or None, optional (default=None) 648 | If int, random_state is the seed used by the random number generator. 649 | If None, the random number generator is the RandomState instance used 650 | by np.random. 651 | """ 652 | 653 | def __init__(self, with_replacement=True, return_indices=False, 654 | random_state=None): 655 | self.return_indices = return_indices 656 | self.with_replacement = with_replacement 657 | self.random_state = random_state 658 | 659 | def sample(self, n_samples): 660 | """Perform undersampling. 661 | Parameters 662 | ---------- 663 | n_samples : int 664 | Number of samples to remove. 665 | Returns 666 | ------- 667 | S : array, shape = [n_majority_samples - n_samples, n_features] 668 | Returns synthetic samples. 669 | """ 670 | np.random.seed(seed=self.random_state) 671 | 672 | if self.n_majority_samples <= n_samples: 673 | n_samples = self.n_majority_samples 674 | 675 | idx = np.random.choice(self.n_majority_samples, 676 | # size=self.n_majority_samples - n_samples, 677 | size=self.n_minority_samples, 678 | replace=self.with_replacement) 679 | 680 | if self.return_indices: 681 | return (self.X_maj[idx], idx) 682 | else: 683 | return self.X_maj[idx] 684 | 685 | def fit(self, X_maj, X_min): 686 | """Train model based on input data. 687 | Parameters 688 | ---------- 689 | X : array-like, shape = [n_majority_samples, n_features] 690 | Holds the majority samples. 691 | """ 692 | self.X_maj = X_maj 693 | self.X_min = X_min 694 | self.n_majority_samples, self.n_features = self.X_maj.shape 695 | self.n_minority_samples = self.X_min.shape[0] 696 | 697 | return self 698 | 699 | import pandas as pd 700 | 701 | class RUSBoost(AdaBoostClassifier): 702 | """Implementation of RUSBoost. 703 | RUSBoost introduces data sampling into the AdaBoost algorithm by 704 | undersampling the majority class using random undersampling (with or 705 | without replacement) on each boosting iteration [1]. 706 | This implementation inherits methods from the scikit-learn 707 | AdaBoostClassifier class, only modifying the `fit` method. 708 | Parameters 709 | ---------- 710 | n_samples : int, optional (default=100) 711 | Number of new synthetic samples per boosting step. 712 | min_ratio : float (default=1.0) 713 | Minimum ratio of majority to minority class samples to generate. 714 | with_replacement : bool, optional (default=True) 715 | Undersample with replacement. 716 | base_estimator : object, optional (default=DecisionTreeClassifier) 717 | The base estimator from which the boosted ensemble is built. 718 | Support for sample weighting is required, as well as proper `classes_` 719 | and `n_classes_` attributes. 720 | n_estimators : int, optional (default=50) 721 | The maximum number of estimators at which boosting is terminated. 722 | In case of perfect fit, the learning procedure is stopped early. 723 | learning_rate : float, optional (default=1.) 724 | Learning rate shrinks the contribution of each classifier by 725 | ``learning_rate``. There is a trade-off between ``learning_rate`` and 726 | ``n_estimators``. 727 | algorithm : {'SAMME', 'SAMME.R'}, optional (default='SAMME.R') 728 | If 'SAMME.R' then use the SAMME.R real boosting algorithm. 729 | ``base_estimator`` must support calculation of class probabilities. 730 | If 'SAMME' then use the SAMME discrete boosting algorithm. 731 | The SAMME.R algorithm typically converges faster than SAMME, 732 | achieving a lower test error with fewer boosting iterations. 733 | random_state : int or None, optional (default=None) 734 | If int, random_state is the seed used by the random number generator. 735 | If None, the random number generator is the RandomState instance used 736 | by np.random. 737 | References 738 | ---------- 739 | .. [1] C. Seiffert, T. M. Khoshgoftaar, J. V. Hulse, and A. Napolitano. 740 | "RUSBoost: Improving Classification Performance when Training Data 741 | is Skewed". International Conference on Pattern Recognition 742 | (ICPR), 2008. 743 | """ 744 | 745 | def __init__(self, 746 | n_samples=100, 747 | min_ratio=1.0, 748 | with_replacement=True, 749 | base_estimator=None, 750 | n_estimators=10, 751 | learning_rate=1., 752 | algorithm='SAMME.R', 753 | random_state=None): 754 | 755 | self.n_samples = n_samples 756 | self.min_ratio = min_ratio 757 | self.algorithm = algorithm 758 | self.rus = RandomUnderSampler(with_replacement=with_replacement, 759 | return_indices=True, 760 | random_state=random_state) 761 | 762 | super(RUSBoost, self).__init__( 763 | base_estimator=base_estimator, 764 | n_estimators=n_estimators, 765 | learning_rate=learning_rate, 766 | random_state=random_state) 767 | 768 | def fit(self, X, y, sample_weight=None, minority_target=None, verbose=False): 769 | """Build a boosted classifier/regressor from the training set (X, y), 770 | performing random undersampling during each boosting step. 771 | Parameters 772 | ---------- 773 | X : {array-like, sparse matrix} of shape = [n_samples, n_features] 774 | The training input samples. Sparse matrix can be CSC, CSR, COO, 775 | DOK, or LIL. COO, DOK, and LIL are converted to CSR. The dtype is 776 | forced to DTYPE from tree._tree if the base classifier of this 777 | ensemble weighted boosting classifier is a tree or forest. 778 | y : array-like of shape = [n_samples] 779 | The target values (class labels in classification, real numbers in 780 | regression). 781 | sample_weight : array-like of shape = [n_samples], optional 782 | Sample weights. If None, the sample weights are initialized to 783 | 1 / n_samples. 784 | minority_target : int 785 | Minority class label. 786 | Returns 787 | ------- 788 | self : object 789 | Returns self. 790 | Notes 791 | ----- 792 | Based on the scikit-learn v0.18 AdaBoostClassifier and 793 | BaseWeightBoosting `fit` methods. 794 | """ 795 | # Check that algorithm is supported. 796 | if self.algorithm not in ('SAMME', 'SAMME.R'): 797 | raise ValueError("algorithm %s is not supported" % self.algorithm) 798 | 799 | # Check parameters. 800 | if self.learning_rate <= 0: 801 | raise ValueError("learning_rate must be greater than zero") 802 | 803 | if (self.base_estimator is None or 804 | isinstance(self.base_estimator, (BaseDecisionTree, 805 | BaseForest))): 806 | DTYPE = np.float64 # from fast_dict.pxd 807 | dtype = DTYPE 808 | accept_sparse = 'csc' 809 | else: 810 | dtype = None 811 | accept_sparse = ['csr', 'csc'] 812 | 813 | X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype, 814 | y_numeric=is_regressor(self)) 815 | 816 | if sample_weight is None: 817 | # Initialize weights to 1 / n_samples. 818 | sample_weight = np.empty(X.shape[0], dtype=np.float64) 819 | sample_weight[:] = 1. / X.shape[0] 820 | else: 821 | sample_weight = check_array(sample_weight, ensure_2d=False) 822 | # Normalize existing weights. 823 | sample_weight = sample_weight / sample_weight.sum(dtype=np.float64) 824 | 825 | # Check that the sample weights sum is positive. 826 | if sample_weight.sum() <= 0: 827 | raise ValueError( 828 | "Attempting to fit with a non-positive " 829 | "weighted number of samples.") 830 | 831 | if minority_target is None: 832 | # Determine the minority class label. 833 | stats_c_ = Counter(y) 834 | maj_c_ = max(stats_c_, key=stats_c_.get) 835 | min_c_ = min(stats_c_, key=stats_c_.get) 836 | self.minority_target = min_c_ 837 | else: 838 | self.minority_target = minority_target 839 | 840 | # Check parameters. 841 | self._validate_estimator() 842 | 843 | # Clear any previous fit results. 844 | self.estimators_ = [] 845 | self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64) 846 | self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64) 847 | 848 | random_state = check_random_state(self.random_state) 849 | 850 | for iboost in range(self.n_estimators): 851 | # Random undersampling step. 852 | X_maj = X[np.where(y != self.minority_target)] 853 | X_min = X[np.where(y == self.minority_target)] 854 | self.rus.fit(X_maj, X_min) 855 | # self.rus.fit(X_maj) 856 | 857 | n_maj = X_maj.shape[0] 858 | n_min = X_min.shape[0] 859 | if n_maj - self.n_samples < int(n_min * self.min_ratio): 860 | self.n_samples = n_maj - int(n_min * self.min_ratio) 861 | X_rus, X_idx = self.rus.sample(self.n_samples) 862 | 863 | if verbose: 864 | print ('{:<12s} | Iter: {} X_maj: {} X_rus: {} X_min: {}'.format( 865 | 'RUSBoost', iboost, len(X_maj), len(X_rus), len(X_min))) 866 | 867 | y_rus = y[np.where(y != self.minority_target)][X_idx] 868 | y_min = y[np.where(y == self.minority_target)] 869 | 870 | sample_weight_rus = \ 871 | sample_weight[np.where(y != self.minority_target)][X_idx] 872 | sample_weight_min = \ 873 | sample_weight[np.where(y == self.minority_target)] 874 | 875 | # Combine the minority and majority class samples. 876 | X_train = np.vstack((X_rus, X_min)) 877 | y_train = np.append(y_rus, y_min) 878 | 879 | # Combine the weights. 880 | sample_weight_train = \ 881 | np.append(sample_weight_rus, sample_weight_min).reshape(-1, 1) 882 | sample_weight_train = \ 883 | np.squeeze(normalize(sample_weight_train, axis=0, norm='l1')) 884 | 885 | # Boosting step. 886 | _, estimator_weight_train, estimator_error = self._boost( 887 | iboost, 888 | X_train, y_train, 889 | sample_weight_train, 890 | random_state) 891 | 892 | y_predict_proba = self.estimators_[-1].predict_proba(X) 893 | y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1), 894 | axis=0) 895 | # Instances incorrectly classified 896 | incorrect = y_predict != y 897 | # Error fraction 898 | estimator_error = np.mean( 899 | np.average(incorrect, weights=sample_weight, axis=0)) 900 | n_classes = self.n_classes_ 901 | classes = self.classes_ 902 | y_codes = np.array([-1. / (n_classes - 1), 1.]) 903 | y_coding = y_codes.take(classes == y[:, np.newaxis]) 904 | estimator_weight = (-1. * self.learning_rate 905 | * ((n_classes - 1.) / n_classes) 906 | * (y_coding * (y_predict_proba)).sum(axis=1)) 907 | 908 | if not iboost == self.n_estimators - 1: 909 | # Only boost positive weights 910 | sample_weight *= np.exp(estimator_weight * ((sample_weight > 0) | (estimator_weight < 0))) 911 | 912 | # Early termination. 913 | if sample_weight is None: 914 | break 915 | 916 | self.estimator_weights_[iboost] = estimator_weight_train 917 | self.estimator_errors_[iboost] = estimator_error 918 | 919 | # Stop if error is zero. 920 | # if estimator_error == 0: 921 | # print('error: {}'.format(estimator_error)) 922 | # break 923 | 924 | sample_weight_sum = np.sum(sample_weight) 925 | 926 | # Stop if the sum of sample weights has become non-positive. 927 | if sample_weight_sum <= 0: 928 | break 929 | 930 | if iboost < self.n_estimators - 1: 931 | # Normalize. 932 | sample_weight /= sample_weight_sum 933 | 934 | return self 935 | 936 | import pandas as pd 937 | from imblearn.over_sampling import SMOTE as SMOTE_IMB 938 | from sklearn.tree import DecisionTreeClassifier as DT 939 | 940 | class SMOTEBagging(): 941 | def __init__(self, 942 | n_samples=100, 943 | min_ratio=1.0, 944 | with_replacement=True, 945 | base_estimator=None, 946 | n_estimators=10, 947 | learning_rate=1., 948 | algorithm='SAMME.R', 949 | random_state=None): 950 | 951 | self.base_estimator = base_estimator 952 | self.n_estimators = n_estimators 953 | self.random_state = random_state 954 | self.estimators_ = [] 955 | 956 | def fit(self, X, y, verbose=False): 957 | 958 | self.total_training_instances = 0 959 | self.total_training_instances_list = [] 960 | self.estimators_ = [] 961 | df = pd.DataFrame(X); df['label'] = y 962 | df_maj = df[df['label']==0]; n_maj = len(df_maj) 963 | df_min = df[df['label']==1]; n_min = len(df_min) 964 | cols = df.columns.tolist(); cols.remove('label') 965 | 966 | for ibagging in range(self.n_estimators): 967 | b = min(0.1*((ibagging%10)+1), 1) 968 | train_maj = df_maj.sample(frac=1, replace=True) 969 | train_min = df_min.sample(frac=(n_maj/n_min)*b, replace=True) 970 | n_min_train = train_min.shape[0] 971 | N = int((n_maj/n_min_train)*(1-b)*100) 972 | ratio = min((n_min_train + N) / n_maj, 1) 973 | df_k = train_maj.append(train_min) 974 | 975 | if N > 0: 976 | X_train, y_train = SMOTE_IMB( 977 | k_neighbors=min(5, len(train_min)-1), 978 | ratio=ratio, 979 | random_state=self.random_state, 980 | ).fit_resample( 981 | df_k[cols], df_k['label'] 982 | ) 983 | else: 984 | X_train, y_train = df_k[cols], df_k['label'] 985 | 986 | self.total_training_instances = self.total_training_instances + len(y_train) 987 | self.total_training_instances_list.append(self.total_training_instances) 988 | if verbose: 989 | print ('{:<12s} | Iter: {} |b: {:.1f}|n_train: {}|n_smote: {}|n_total_train: {}'.format( 990 | 'SMOTEBagging', ibagging, b, len(y_train), len(y_train)-len(df_k), self.total_training_instances)) 991 | model = clone(self.base_estimator).fit(X_train, y_train) 992 | self.estimators_.append(model) 993 | 994 | return self 995 | 996 | def predict_proba(self, X): 997 | 998 | y_pred = np.array([model.predict_proba(X)[:, 1] for model in self.estimators_]).mean(axis=0) 999 | if y_pred.ndim == 1: 1000 | y_pred = y_pred[:, np.newaxis] 1001 | if y_pred.shape[1] == 1: 1002 | y_pred = np.append(1-y_pred, y_pred, axis=1) 1003 | return y_pred 1004 | 1005 | def predict(self, X): 1006 | 1007 | y_pred_binarazed = binarize(self.predict_proba(X)[:,1].reshape(1,-1), threshold=0.5)[0] 1008 | return y_pred_binarazed 1009 | 1010 | 1011 | import pandas as pd 1012 | from sklearn.tree import DecisionTreeClassifier as DT 1013 | 1014 | class UnderBagging(): 1015 | def __init__(self, 1016 | n_samples=100, 1017 | min_ratio=1.0, 1018 | with_replacement=True, 1019 | base_estimator=None, 1020 | n_estimators=10, 1021 | learning_rate=1., 1022 | algorithm='SAMME.R', 1023 | random_state=None): 1024 | 1025 | self.base_estimator = base_estimator 1026 | self.n_estimators = n_estimators 1027 | self.random_state = random_state 1028 | self.estimators_ = [] 1029 | 1030 | def fit(self, X, y, verbose=False): 1031 | 1032 | self.estimators_ = [] 1033 | df = pd.DataFrame(X); df['label'] = y 1034 | df_maj = df[df['label']==0]; n_maj = len(df_maj) 1035 | df_min = df[df['label']==1]; n_min = len(df_min) 1036 | cols = df.columns.tolist(); cols.remove('label') 1037 | 1038 | for ibagging in range(self.n_estimators): 1039 | train_maj = df_maj.sample(n=int(n_min), random_state=self.random_state) 1040 | train_min = df_min 1041 | if verbose: 1042 | print ('{:<12s} | Iter: {} X_maj: {} X_rus: {} X_min: {}'.format( 1043 | 'UnderBagging', ibagging, len(df_maj), len(train_maj), len(train_min))) 1044 | df_k = train_maj.append(train_min) 1045 | X_train, y_train = df_k[cols], df_k['label'] 1046 | model = clone(self.base_estimator).fit(X_train, y_train) 1047 | self.estimators_.append(model) 1048 | 1049 | return self 1050 | 1051 | def predict_proba(self, X): 1052 | 1053 | y_pred = np.array([model.predict_proba(X)[:, 1] for model in self.estimators_]).mean(axis=0) 1054 | if y_pred.ndim == 1: 1055 | y_pred = y_pred[:, np.newaxis] 1056 | if y_pred.shape[1] == 1: 1057 | y_pred = np.append(1-y_pred, y_pred, axis=1) 1058 | return y_pred 1059 | 1060 | def predict(self, X): 1061 | 1062 | y_pred_binarazed = binarize(self.predict_proba(X)[:,1].reshape(1,-1), threshold=0.5)[0] 1063 | return y_pred_binarazed 1064 | 1065 | 1066 | from sklearn.base import clone 1067 | class BalanceCascade(): 1068 | """ 1069 | The implementation of BalanceCascade. 1070 | Hyper-parameters: 1071 | base_estimator : scikit-learn classifier object 1072 | optional (default=DecisionTreeClassifier) 1073 | The base estimator from which the ensemble is built. 1074 | n_estimators: Number of iterations / estimators 1075 | k_bins: Number of hardness bins 1076 | """ 1077 | def __init__(self, base_estimator=DT(), n_estimators=10, random_state=None): 1078 | 1079 | self.base_estimator = base_estimator 1080 | self.n_estimators = n_estimators 1081 | self.random_state = random_state 1082 | self.estimators_ = [] 1083 | # Will be set in the fit function 1084 | self.feature_cols = None 1085 | 1086 | def _fit_baselearner(self, df_train): 1087 | 1088 | model = clone(self.base_estimator) 1089 | return model.fit(df_train[self.feature_cols], df_train['label']) 1090 | 1091 | def fit(self, X, y, verbose=False, visualize=False): 1092 | 1093 | self.estimators_ = [] 1094 | # Initialize majority & minority set 1095 | df = pd.DataFrame(X); df['label'] = y 1096 | df_maj = df[y==0]; n_maj = df_maj.shape[0] 1097 | df_min = df[y==1]; n_min = df_min.shape[0] 1098 | self.feature_cols = df.columns.tolist() 1099 | self.feature_cols.remove('label') 1100 | 1101 | ir = n_min / n_maj 1102 | keep_fp_rate = np.power(ir, 1/(self.n_estimators-1)) 1103 | 1104 | # Algorithm start 1105 | for ibagging in range(1, self.n_estimators): 1106 | df_train = df_maj.sample(n=n_min).append(df_min) 1107 | if visualize: 1108 | df_train.plot.scatter(x=0, y=1, s=3, c='label', colormap='coolwarm', title='Iter {} training set'.format(ibagging)) 1109 | if verbose: 1110 | print ('{:<12s} | Iter: {} X_maj: {} X_rus: {} X_min: {}'.format( 1111 | 'Cascade', ibagging, len(df_maj), len(df_min), len(df_min))) 1112 | self.estimators_.append(self._fit_baselearner(df_train)) 1113 | # drop "easy" majority samples 1114 | df_maj['pred_proba'] = self.predict(df_maj[self.feature_cols]) 1115 | df_maj = df_maj.sort_values(by='pred_proba', ascending=False)[:int(keep_fp_rate*len(df_maj)+1)] 1116 | 1117 | return self 1118 | 1119 | def predict_proba(self, X): 1120 | 1121 | y_pred = np.array([model.predict_proba(X)[:, 1] for model in self.estimators_]).mean(axis=0) 1122 | if y_pred.ndim == 1: 1123 | y_pred = y_pred[:, np.newaxis] 1124 | if y_pred.shape[1] == 1: 1125 | y_pred = np.append(1-y_pred, y_pred, axis=1) 1126 | return y_pred 1127 | 1128 | def predict(self, X): 1129 | 1130 | y_pred_binarazed = binarize(self.predict_proba(X)[:,1].reshape(1,-1), threshold=0.5)[0] 1131 | return y_pred_binarazed 1132 | 1133 | class SelfPacedEnsemble(): 1134 | """ Self-paced Ensemble (SPE) 1135 | 1136 | Parameters 1137 | ---------- 1138 | base_estimator : object, optional (default=sklearn.Tree.DecisionTreeClassifier()) 1139 | | The base estimator to fit on self-paced under-sampled subsets of the dataset. 1140 | | NO need to support sample weighting. 1141 | | Built-in `fit()`, `predict()`, `predict_proba()` methods are required. 1142 | 1143 | hardness_func : function, optional 1144 | | (default=`lambda y_true, y_pred: np.absolute(y_true-y_pred)`) 1145 | | User-specified classification hardness function 1146 | | | Parameters: 1147 | | | | y_true: 1-d array-like, shape = [n_samples] 1148 | | | | y_pred: 1-d array-like, shape = [n_samples] 1149 | | | Returns: 1150 | | | | hardness: 1-d array-like, shape = [n_samples] 1151 | 1152 | n_estimators : integer, optional (default=10) 1153 | | The number of base estimators in the ensemble. 1154 | 1155 | k_bins : integer, optional (default=10) 1156 | | The number of hardness bins that were used to approximate hardness distribution. 1157 | 1158 | random_state : integer / RandomState instance / None, optional (default=None) 1159 | | If integer, random_state is the seed used by the random number generator; 1160 | | If RandomState instance, random_state is the random number generator; 1161 | | If None, the random number generator is the RandomState instance used by 1162 | | `numpy.random`. 1163 | 1164 | Attributes 1165 | ---------- 1166 | base_estimator_ : estimator 1167 | | The base estimator from which the ensemble is grown. 1168 | 1169 | estimators_ : list of estimator 1170 | | The collection of fitted base estimators. 1171 | 1172 | 1173 | Example: 1174 | ``` 1175 | import numpy as np 1176 | from sklearn import datasets 1177 | from sklearn.tree import DecisionTreeClassifier 1178 | from src.self_paced_ensemble import SelfPacedEnsemble 1179 | from src.utils import ( 1180 | make_binary_classification_target, imbalance_train_test_split) 1181 | 1182 | X, y = datasets.fetch_covtype(return_X_y=True) 1183 | y = make_binary_classification_target(y, 7, True) 1184 | X_train, X_test, y_train, y_test = imbalance_train_test_split( 1185 | X, y, test_size=0.2, random_state=42) 1186 | 1187 | def absolute_error(y_true, y_pred): 1188 | # Self-defined classification hardness function 1189 | return np.absolute(y_true - y_pred) 1190 | 1191 | spe = SelfPacedEnsemble( 1192 | base_estimator=DecisionTreeClassifier(), 1193 | hardness_func=absolute_error, 1194 | n_estimators=10, 1195 | k_bins=10, 1196 | random_state=42, 1197 | ).fit( 1198 | X=X_train, 1199 | y=y_train, 1200 | ) 1201 | print('auc_prc_score: {}'.format(spe.score(X_test, y_test))) 1202 | ``` 1203 | 1204 | """ 1205 | def __init__(self, 1206 | base_estimator=DecisionTreeClassifier(), 1207 | hardness_func=cross_entropy, 1208 | n_estimators=10, 1209 | k_bins=10, 1210 | random_state=None): 1211 | self.base_estimator = base_estimator 1212 | self.estimators_ = [] 1213 | self._hardness_func = hardness_func 1214 | self._n_estimators = n_estimators 1215 | self._k_bins = k_bins 1216 | self._random_state = random_state 1217 | 1218 | def _fit_base_estimator(self, X, y): 1219 | """Private function used to train a single base estimator.""" 1220 | return sklearn.base.clone(self.base_estimator).fit(X, y) 1221 | 1222 | def _random_under_sampling(self, X_maj, y_maj, X_min, y_min): 1223 | """Private function used to perform random under-sampling.""" 1224 | np.random.seed(self._random_state) 1225 | idx = np.random.choice(len(X_maj), len(X_min), replace=False) 1226 | X_train = np.concatenate([X_maj[idx], X_min]) 1227 | y_train = np.concatenate([y_maj[idx], y_min]) 1228 | return X_train, y_train 1229 | 1230 | def _self_paced_under_sampling(self, 1231 | X_maj, y_maj, X_min, y_min, i_estimator): 1232 | """Private function used to perform self-paced under-sampling.""" 1233 | # Update hardness value estimation 1234 | y_pred_maj = self.predict_proba(X_maj)[:, 1] 1235 | hardness = self._hardness_func(y_maj, y_pred_maj) 1236 | 1237 | # If hardness values are not distinguishable, perform random smapling 1238 | if hardness.max() == hardness.min(): 1239 | X_train, y_train = self._random_under_sampling(X_maj, y_maj, X_min, y_min) 1240 | # Else allocate majority samples into k hardness bins 1241 | else: 1242 | step = (hardness.max()-hardness.min()) / self._k_bins 1243 | bins = []; ave_contributions = [] 1244 | for i_bins in range(self._k_bins): 1245 | idx = ( 1246 | (hardness >= i_bins*step + hardness.min()) & 1247 | (hardness < (i_bins+1)*step + hardness.min()) 1248 | ) 1249 | # Marginal samples with highest hardness value -> kth bin 1250 | if i_bins == (self._k_bins-1): 1251 | idx = idx | (hardness==hardness.max()) 1252 | bins.append(X_maj[idx]) 1253 | ave_contributions.append(hardness[idx].mean()) 1254 | 1255 | # Update self-paced factor alpha 1256 | alpha = np.tan(np.pi*0.5*(i_estimator/(self._n_estimators-1))) 1257 | # Caculate sampling weight 1258 | weights = 1 / (ave_contributions + alpha) 1259 | weights[np.isnan(weights)] = 0 1260 | # Caculate sample number from each bin 1261 | n_sample_bins = len(X_min) * weights / weights.sum() 1262 | n_sample_bins = n_sample_bins.astype(int)+1 1263 | 1264 | # Perform self-paced under-sampling 1265 | sampled_bins = [] 1266 | for i_bins in range(self._k_bins): 1267 | if min(len(bins[i_bins]), n_sample_bins[i_bins]) > 0: 1268 | np.random.seed(self._random_state) 1269 | idx = np.random.choice( 1270 | len(bins[i_bins]), 1271 | min(len(bins[i_bins]), n_sample_bins[i_bins]), 1272 | replace=False) 1273 | sampled_bins.append(bins[i_bins][idx]) 1274 | X_train_maj = np.concatenate(sampled_bins, axis=0) 1275 | y_train_maj = np.full(X_train_maj.shape[0], y_maj[0]) 1276 | X_train = np.concatenate([X_train_maj, X_min]) 1277 | y_train = np.concatenate([y_train_maj, y_min]) 1278 | 1279 | return X_train, y_train 1280 | 1281 | def fit(self, X, y, label_maj=0, label_min=1, verbose=False): 1282 | """Build a self-paced ensemble of estimators from the training set (X, y). 1283 | 1284 | Parameters 1285 | ---------- 1286 | X : {array-like, sparse matrix} of shape = [n_samples, n_features] 1287 | The training input samples. Sparse matrices are accepted only if 1288 | they are supported by the base estimator. 1289 | 1290 | y : array-like, shape = [n_samples] 1291 | The target values (class labels). 1292 | 1293 | label_maj : int, bool or float, optional (default=0) 1294 | The majority class label, default to be negative class. 1295 | 1296 | label_min : int, bool or float, optional (default=1) 1297 | The minority class label, default to be positive class. 1298 | 1299 | Returns 1300 | ------ 1301 | self : object 1302 | """ 1303 | self.estimators_ = [] 1304 | # Initialize by spliting majority / minority set 1305 | X_maj = X[y==label_maj]; y_maj = y[y==label_maj] 1306 | X_min = X[y==label_min]; y_min = y[y==label_min] 1307 | 1308 | # Random under-sampling in the 1st round (cold start) 1309 | X_train, y_train = self._random_under_sampling( 1310 | X_maj, y_maj, X_min, y_min) 1311 | self.estimators_.append( 1312 | self._fit_base_estimator( 1313 | X_train, y_train)) 1314 | 1315 | # Loop start 1316 | for i_estimator in range(1, self._n_estimators): 1317 | X_train, y_train = self._self_paced_under_sampling( 1318 | X_maj, y_maj, X_min, y_min, i_estimator,) 1319 | if verbose: 1320 | print ('{:<12s} | Iter: {} X_maj: {} X_min: {} alpha: {:.3f}'.format( 1321 | 'SPEnsemble', i_estimator, len(X_maj), len(X_min), np.tan(np.pi*0.5*(i_estimator/(self._n_estimators-1))))) 1322 | self.estimators_.append( 1323 | self._fit_base_estimator( 1324 | X_train, y_train)) 1325 | 1326 | return self 1327 | 1328 | def predict_proba(self, X): 1329 | """Predict class probabilities for X. 1330 | 1331 | The predicted class probabilities of an input sample is computed as 1332 | the mean predicted class probabilities of the base estimators in the 1333 | ensemble. If base estimators do not implement a ``predict_proba`` 1334 | method, then it resorts to voting and the predicted class probabilities 1335 | of an input sample represents the proportion of estimators predicting 1336 | each class. 1337 | 1338 | Parameters 1339 | ---------- 1340 | X : {array-like, sparse matrix} of shape = [n_samples, n_features] 1341 | The training input samples. Sparse matrices are accepted only if 1342 | they are supported by the base estimator. 1343 | 1344 | Returns 1345 | ------- 1346 | p : array of shape = [n_samples, n_classes] 1347 | The class probabilities of the input samples. 1348 | """ 1349 | y_pred = np.array( 1350 | [model.predict_proba(X)[:, 1] for model in self.estimators_] 1351 | ).mean(axis=0) 1352 | if y_pred.ndim == 1: 1353 | y_pred = y_pred[:, np.newaxis] 1354 | if y_pred.shape[1] == 1: 1355 | y_pred = np.append(1-y_pred, y_pred, axis=1) 1356 | return y_pred 1357 | 1358 | def predict(self, X): 1359 | """Predict class for X. 1360 | 1361 | The predicted class of an input sample is computed as the class with 1362 | the highest mean predicted probability. If base estimators do not 1363 | implement a ``predict_proba`` method, then it resorts to voting. 1364 | 1365 | Parameters 1366 | ---------- 1367 | X : {array-like, sparse matrix} of shape = [n_samples, n_features] 1368 | The training input samples. Sparse matrices are accepted only if 1369 | they are supported by the base estimator. 1370 | 1371 | Returns 1372 | ------- 1373 | y : array of shape = [n_samples] 1374 | The predicted classes. 1375 | """ 1376 | y_pred_binarized = sklearn.preprocessing.binarize( 1377 | self.predict_proba(X)[:,1].reshape(1,-1), threshold=0.5)[0] 1378 | return y_pred_binarized 1379 | 1380 | def score(self, X, y): 1381 | """Returns the average precision score (equivalent to the area under 1382 | the precision-recall curve) on the given test data and labels. 1383 | 1384 | Parameters 1385 | ---------- 1386 | X : array-like, shape = (n_samples, n_features) 1387 | Test samples. 1388 | 1389 | y : array-like, shape = (n_samples) or (n_samples, n_outputs) 1390 | True labels for X. 1391 | 1392 | Returns 1393 | ------- 1394 | score : float 1395 | Average precision of self.predict_proba(X)[:, 1] wrt. y. 1396 | """ 1397 | return sklearn.metrics.average_precision_score( 1398 | y, self.predict_proba(X)[:, 1]) -------------------------------------------------------------------------------- /baselines/canonical_resampling.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Jan 13 14:32:27 2019 4 | @author: v-zhinli 5 | mailto: znliu19@mails.jlu.edu.cn / zhining.liu@outlook.com 6 | """ 7 | 8 | from imblearn.under_sampling import ( 9 | ClusterCentroids, 10 | NearMiss, 11 | RandomUnderSampler, 12 | EditedNearestNeighbours, 13 | AllKNN, 14 | TomekLinks, 15 | OneSidedSelection, 16 | RepeatedEditedNearestNeighbours, 17 | CondensedNearestNeighbour, 18 | NeighbourhoodCleaningRule, 19 | ) 20 | from imblearn.over_sampling import ( 21 | RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE, 22 | ) 23 | from imblearn.combine import ( 24 | SMOTEENN, SMOTETomek, 25 | ) 26 | 27 | from sklearn.tree import DecisionTreeClassifier as DT 28 | from collections import Counter 29 | from time import clock 30 | import pandas as pd 31 | 32 | class Error(Exception): 33 | pass 34 | 35 | class Resample_classifier(object): 36 | ''' 37 | Re-sampling methods for imbalance classification, based on imblearn python package. 38 | imblearn url: https://github.com/scikit-learn-contrib/imbalanced-learn 39 | Hyper-parameters: 40 | base_estimator : scikit-learn classifier object 41 | optional (default=DecisionTreeClassifier) 42 | The base estimator used for training after re-sampling 43 | ''' 44 | def __init__(self, base_estimator=DT(), resample_by='ORG'): 45 | self.base_estimator = base_estimator 46 | self.resample_by = resample_by 47 | 48 | def fit(self, X_train, y_train, verbose=False): 49 | start_time = clock() 50 | X_train_resampled, y_train_resampled = self.resample(X_train, y_train, by=self.resample_by) 51 | end_time = clock() 52 | self._last_resample_info = 'Resampling method: {}, class distribution from {} to {}, time used {}s'.format( 53 | self.resample_by, dict(Counter(y_train)), dict(Counter(y_train_resampled)), end_time - start_time, 54 | ) 55 | if verbose: 56 | print (self._last_resample_info) 57 | self.base_estimator.fit(X_train_resampled, y_train_resampled) 58 | 59 | def predict(self, X): 60 | return self.base_estimator.predict(X) 61 | 62 | def predict_proba(self, X): 63 | return self.base_estimator.predict_proba(X) 64 | 65 | def resample(self, X, y, by, random_state=None): 66 | ''' 67 | by: String 68 | The method used to perform re-sampling 69 | currently support: ['RUS', 'CNN', 'ENN', 'NCR', 'Tomek', 'ALLKNN', 'OSS', 70 | 'NM', 'CC', 'SMOTE', 'ADASYN', 'BorderSMOTE', 'SMOTEENN', 'SMOTETomek', 71 | 'ORG'] 72 | ''' 73 | if by == 'RUS': 74 | sampler = RandomUnderSampler(random_state=random_state) 75 | elif by == 'CNN': 76 | sampler = CondensedNearestNeighbour(random_state=random_state) 77 | elif by == 'ENN': 78 | sampler = EditedNearestNeighbours(random_state=random_state) 79 | elif by == 'NCR': 80 | sampler = NeighbourhoodCleaningRule(random_state=random_state) 81 | elif by == 'Tomek': 82 | sampler = TomekLinks(random_state=random_state) 83 | elif by == 'ALLKNN': 84 | sampler = AllKNN(random_state=random_state) 85 | elif by == 'OSS': 86 | sampler = OneSidedSelection(random_state=random_state) 87 | elif by == 'NM': 88 | sampler = NearMiss(random_state=random_state) 89 | elif by == 'CC': 90 | sampler = ClusterCentroids(random_state=random_state) 91 | elif by == 'ROS': 92 | sampler = RandomOverSampler(random_state=random_state) 93 | elif by == 'SMOTE': 94 | sampler = SMOTE(random_state=random_state) 95 | elif by == 'ADASYN': 96 | sampler = ADASYN(random_state=random_state) 97 | elif by == 'BorderSMOTE': 98 | sampler = BorderlineSMOTE(random_state=random_state) 99 | elif by == 'SMOTEENN': 100 | sampler = SMOTEENN(random_state=random_state) 101 | elif by == 'SMOTETomek': 102 | sampler = SMOTETomek(random_state=random_state) 103 | elif by == 'ORG': 104 | sampler = None 105 | else: 106 | raise Error('Unexpected \'by\' type {}'.format(by)) 107 | 108 | if by != 'ORG': 109 | X_train, y_train = sampler.fit_resample(X, y) 110 | else: 111 | X_train, y_train = X, y 112 | 113 | return X_train, y_train -------------------------------------------------------------------------------- /environment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Feb 8 02:27:20 2020 4 | @author: ZhiningLiu1998 5 | mailto: zhining.liu@outlook.com / v-zhinli@microsoft.com 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | import sklearn 11 | import warnings 12 | warnings.filterwarnings("ignore") 13 | 14 | from utils import ( 15 | Rater, meta_sampling, histogram_error_distribution, imbalance_train_test_split, 16 | ) 17 | 18 | class Ensemble(): 19 | """A basic ensemble learning framework. 20 | 21 | Parameters 22 | ---------- 23 | base_estimator : object (scikit-learn classifier) 24 | The base estimator used to build ensemble classifiers. 25 | NO need to support sample weighting. 26 | Built-in `fit()`, `predict()`, `predict_proba()` methods are required. 27 | 28 | Attributes 29 | ---------- 30 | base_estimator_ : estimator 31 | The base estimator from which the ensemble is grown. 32 | 33 | estimators_ : list of estimators 34 | The collection of fitted sub-estimators. 35 | """ 36 | def __init__(self, base_estimator): 37 | self.estimators_ = [] 38 | if not sklearn.base.is_classifier(base_estimator): 39 | raise TypeError(f'Base estimator {base_estimator} is not a sklearn classifier.') 40 | self.base_estimator_ = base_estimator 41 | 42 | def fit_step(self, X, y): 43 | """Bulid a new base classifier from the training set (X, y). 44 | 45 | Parameters 46 | ---------- 47 | y : array-like of shape = [n_samples] 48 | The training labels. 49 | 50 | X : array-like of shape = [n_samples, n_features] 51 | The training instances. 52 | 53 | Returns 54 | ---------- 55 | self : object (Ensemble) 56 | """ 57 | self.estimators_.append( 58 | sklearn.base.clone(self.base_estimator_).fit(X, y) 59 | ) 60 | return self 61 | 62 | def predict_proba(self, X): 63 | """Predict class probabilities for X. 64 | 65 | The predicted class probabilities of an input sample is computed as the 66 | mean predicted class probabilities of the classifiers in the ensemble. 67 | 68 | Parameters 69 | ---------- 70 | X : array-like of shape = [n_samples, n_features] 71 | The input data instances. 72 | 73 | Returns 74 | ---------- 75 | p : array-like of shape [n_samples, n_classes] 76 | The class probabilities of the input samples. 77 | """ 78 | y_pred = np.array( 79 | [model.predict_proba(X)[:, 1] for model in self.estimators_] 80 | ).mean(axis=0) 81 | if y_pred.ndim == 1: 82 | y_pred = y_pred[:, np.newaxis] 83 | if y_pred.shape[1] == 1: 84 | y_pred = np.append(1-y_pred, y_pred, axis=1) 85 | return y_pred 86 | 87 | def predict(self, X): 88 | """Predict classes for X. 89 | 90 | The predicted class of an input sample is computed as the mean 91 | prediction of the classifiers in the ensemble. 92 | 93 | Parameters 94 | ---------- 95 | X : array-like of shape = [n_samples, n_features] 96 | The input data instances. 97 | 98 | Returns 99 | ---------- 100 | y : array-like of shape = [n_samples] 101 | The predicted classes. 102 | """ 103 | y_pred_binarized = sklearn.preprocessing.binarize( 104 | self.predict_proba(X)[:,1].reshape(1,-1), threshold=0.5)[0] 105 | return y_pred_binarized 106 | 107 | def score(self, X, y): 108 | """Return area under precision recall curve (AUCPRC) scores for X, y. 109 | 110 | Parameters 111 | ---------- 112 | X : array-like of shape = [n_samples, n_features] 113 | The input data instances. 114 | 115 | y : array-like of shape = [n_samples] 116 | Labels for X. 117 | 118 | Yields 119 | ---------- 120 | z : float 121 | """ 122 | yield sklearn.metrics.average_precision_score( 123 | y, self.predict_proba(X)[:, 1]) 124 | 125 | class EnsembleTrainingEnv(Ensemble): 126 | """The ensemble training environment in MESA. 127 | 128 | Parameters 129 | ---------- 130 | args : arguments 131 | See arguments.py for more information. 132 | 133 | base_estimator : object (scikit-learn classifier) 134 | The base estimator used to build ensemble classifiers. 135 | NO need to support sample weighting. 136 | Built-in `fit()`, `predict()`, `predict_proba()` methods are required. 137 | 138 | Attributes 139 | ---------- 140 | args : arguments 141 | 142 | rater : object (Rater) 143 | Rater for evaluate classifiers performance on class imabalanced data. 144 | See arguments.py for more information. 145 | 146 | base_estimator_ : object (scikit-learn classifier) 147 | The base estimator from which the ensemble is grown. 148 | 149 | estimators_ : list of classifiers 150 | The collection of fitted sub-estimators. 151 | """ 152 | def __init__(self, args, base_estimator): 153 | 154 | super(EnsembleTrainingEnv, self).__init__( 155 | base_estimator=base_estimator) 156 | 157 | self.base_estimator_ = base_estimator 158 | self.args = args 159 | self.rater = Rater(metric=args.metric) 160 | 161 | def load_data(self, X_train, y_train, X_valid, y_valid, X_test=None, y_test=None, train_ratio=1): 162 | """Load and preprocess the train/valid/test data into the environment.""" 163 | self.flag_use_test_set = False if X_test is None or y_test is None else True 164 | if train_ratio < 1: 165 | print ('Using {:.2%} random subset for meta-training.'.format(train_ratio)) 166 | _, X_train, _, y_train = imbalance_train_test_split(X_train, y_train, test_size=train_ratio) 167 | self.X_train, self.y_train = pd.DataFrame(X_train), pd.Series(y_train) 168 | self.X_valid, self.y_valid = pd.DataFrame(X_valid), pd.Series(y_valid) 169 | self.X_test, self.y_test = pd.DataFrame(X_test), pd.Series(y_test) 170 | self.mask_maj_train, self.mask_min_train = (y_train==0), (y_train==1) 171 | self.mask_maj_valid, self.mask_min_valid = (y_valid==0), (y_valid==1) 172 | self.n_min_samples = self.mask_min_train.sum() 173 | n_samples = int(self.n_min_samples*self.args.train_ir) 174 | if n_samples > self.mask_maj_train.sum(): 175 | raise ValueError(f"\ 176 | Argument 'train_ir' should be smaller than imbalance ratio,\n \ 177 | Please set this parameter to < {self.mask_maj_train.sum()/self.mask_min_train.sum()}.\ 178 | ") 179 | self.n_samples = n_samples 180 | 181 | def init(self): 182 | """Reset the environment.""" 183 | self.estimators_ = [] 184 | # buffer the predict probabilities for better efficiency 185 | # initialize 186 | self.y_pred_train_buffer = np.zeros_like(self.y_train) 187 | self.y_pred_valid_buffer = np.zeros_like(self.y_valid) 188 | if self.flag_use_test_set: 189 | self.y_pred_test_buffer = np.zeros_like(self.y_test) 190 | self._warm_up() 191 | 192 | def get_state(self): 193 | """Fetch the current state of the environment.""" 194 | hist_train = histogram_error_distribution( 195 | self.y_train[self.mask_maj_train], 196 | self.y_pred_train_buffer[self.mask_maj_train], 197 | self.args.num_bins) 198 | hist_valid = histogram_error_distribution( 199 | self.y_valid[self.mask_maj_valid], 200 | self.y_pred_valid_buffer[self.mask_maj_valid], 201 | self.args.num_bins) 202 | hist_train = hist_train / hist_train.sum() * self.args.num_bins 203 | hist_valid = hist_valid / hist_valid.sum() * self.args.num_bins 204 | state = np.concatenate([hist_train, hist_valid]) 205 | return state 206 | 207 | def step(self, action, verbose=False): 208 | """Perform an environment step. 209 | 210 | Parameters 211 | ---------- 212 | action: float, in [0, 1] 213 | The action (mu) to execute in the environment. 214 | 215 | verbose: bool, optional (default=False) 216 | Whether to compute and return the information about the current ensemble. 217 | 218 | Returns 219 | ---------- 220 | next_state : array-like of shape [state_size] 221 | The state of the environment after executing the action. 222 | 223 | reward : float 224 | The reward of taking the action. 225 | 226 | done : bool 227 | Indicates the end of an episode. 228 | True if the ensemble reaches the maximum number of base estimators. 229 | 230 | info : string 231 | Information about the current ensemble. 232 | Empty string if verbose == False. 233 | """ 234 | # check action value 235 | if action < 0 or action > 1: 236 | raise ValueError("Action must be a float in [0, 1].") 237 | 238 | # perform meta-sampling 239 | X_maj_subset = meta_sampling( 240 | y_pred = self.y_pred_train_buffer[self.mask_maj_train], 241 | y_true = self.y_train[self.mask_maj_train], 242 | n_under_samples = self.n_samples, 243 | X = self.X_train[self.mask_maj_train], 244 | mu = action, 245 | sigma = self.args.sigma, 246 | random_state = self.args.random_state,) 247 | # build training subset (X_train_iter, y_train_iter) 248 | X_train_iter = pd.concat([X_maj_subset, self.X_train[self.mask_min_train]]).values 249 | y_train_iter = np.concatenate([np.zeros(X_maj_subset.shape[0]), np.ones(self.n_min_samples)]) 250 | 251 | score_valid_before = self.rater.score(self.y_valid, self.y_pred_valid_buffer) 252 | 253 | # build a new base classifier from (X_train_iter, y_train_iter) 254 | self.fit_step(X_train_iter, y_train_iter) 255 | self.update_all_pred_buffer() 256 | 257 | score_valid = self.rater.score(self.y_valid, self.y_pred_valid_buffer) 258 | 259 | # obtain return values 260 | next_state = self.get_state() 261 | reward = score_valid - score_valid_before 262 | done = True if len(self.estimators_) >= self.args.max_estimators else False 263 | info = '' 264 | 265 | # fetch environment information if verbose==True 266 | if self.args.meta_verbose is 'full' or verbose: 267 | score_train = self.rater.score(self.y_train, self.y_pred_train_buffer) 268 | score_test = self.rater.score(self.y_test, self.y_pred_test_buffer) if self.flag_use_test_set else 'NULL' 269 | info = 'k={:<3d}|{}| train {:.3f} | valid {:.3f} | '.format( 270 | len(self.estimators_)-1, self.args.metric, score_train, score_valid) 271 | info += 'test {:.3f}'.format(score_test) if self.flag_use_test_set else 'test NULL' 272 | 273 | return next_state, reward, done, info 274 | 275 | def update_all_pred_buffer(self): 276 | """Update all buffered predict probabilities.""" 277 | n_clf = len(self.estimators_) 278 | self.y_pred_train_buffer = self._update_pred_buffer(n_clf, self.X_train, self.y_pred_train_buffer) 279 | self.y_pred_valid_buffer = self._update_pred_buffer(n_clf, self.X_valid, self.y_pred_valid_buffer) 280 | if self.flag_use_test_set: 281 | self.y_pred_test_buffer = self._update_pred_buffer(n_clf, self.X_test, self.y_pred_test_buffer) 282 | return 283 | 284 | def _update_pred_buffer(self, n_clf, X, y_pred_buffer): 285 | """Update buffered predict probabilities. 286 | 287 | Parameters 288 | ---------- 289 | n_clf : int 290 | Current ensemble size. 291 | 292 | X : array-like of shape = [n_samples, n_features] 293 | The input data instances. 294 | 295 | y_pred_buffer : array-like of shape [n_samples] 296 | The buffered predict probabilities of X. 297 | 298 | Returns 299 | ---------- 300 | y_pred_updated : array-like of shape [n_samples] 301 | """ 302 | y_pred_last_clf = self.estimators_[-1].predict_proba(X)[:, 1] 303 | y_pred_buffer_updated = (y_pred_buffer * (n_clf-1) + y_pred_last_clf) / n_clf 304 | return y_pred_buffer_updated 305 | 306 | def _warm_up(self): 307 | """Train the first base classifier with random under-sampling.""" 308 | X_maj = self.X_train[self.mask_maj_train] 309 | X_min = self.X_train[self.mask_min_train] 310 | X_maj_rus = X_maj.sample(n=self.n_samples, random_state=self.args.random_state) 311 | # X_maj_rus = X_maj 312 | X_train_rus = pd.concat([X_maj_rus, X_min]).values 313 | y_train_rus = np.concatenate([np.zeros(X_maj_rus.shape[0]), np.ones(X_min.shape[0])]) 314 | self.fit_step(X_train_rus, y_train_rus) 315 | self.update_all_pred_buffer() 316 | return -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import pandas as pd 3 | import numpy as np 4 | import time 5 | from mesa import Mesa 6 | from arguments import parser 7 | from utils import Rater, load_dataset 8 | from sklearn.tree import DecisionTreeClassifier 9 | 10 | if __name__ == '__main__': 11 | 12 | # load dataset & prepare environment 13 | args = parser.parse_args() 14 | rater = Rater(args.metric) 15 | X_train, y_train, X_valid, y_valid, X_test, y_test = load_dataset(args.dataset) 16 | base_estimator = DecisionTreeClassifier(max_depth=None) 17 | 18 | # meta-training 19 | print ('\nStart meta-training of MESA ... ...\n') 20 | mesa = Mesa( 21 | args=args, 22 | base_estimator=base_estimator, 23 | n_estimators=args.max_estimators) 24 | mesa.meta_fit(X_train, y_train, X_valid, y_valid, X_test, y_test) 25 | 26 | # test 27 | print ('\nStart ensemble training of MESA ... ...\n') 28 | runs = 50 29 | scores_list, time_list = [], [] 30 | for i_run in tqdm(range(runs)): 31 | start_time = time.clock() 32 | mesa.fit(X_train, y_train, X_valid, y_valid, verbose=False) 33 | end_time = time.clock() 34 | time_list.append(end_time - start_time) 35 | score_train = rater.score(y_train, mesa.predict_proba(X_train)[:,1]) 36 | score_valid = rater.score(y_valid, mesa.predict_proba(X_valid)[:,1]) 37 | score_test = rater.score(y_test, mesa.predict_proba(X_test)[:,1]) 38 | scores_list.append([score_train, score_valid, score_test]) 39 | 40 | # print results to stdout 41 | df_scores = pd.DataFrame(scores_list, columns=['train', 'valid', 'test']) 42 | info = f'Dataset: {args.dataset}\nMESA {args.metric}|' 43 | for column in df_scores.columns: 44 | info += ' {} {:.3f}-{:.3f} |'.format(column, df_scores.mean()[column], df_scores.std()[column]) 45 | info += ' {} runs (mean-std) |'.format(runs) 46 | info += ' ave run time: {:.2f}s'.format(np.mean(time_list)) 47 | print (info) -------------------------------------------------------------------------------- /mesa.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Feb 8 02:27:20 2020 4 | @author: ZhiningLiu1998 5 | mailto: zhining.liu@outlook.com / v-zhinli@microsoft.com 6 | """ 7 | 8 | import os 9 | import torch 10 | import pandas as pd 11 | import numpy as np 12 | from gym import spaces 13 | from sac_src.sac import SAC 14 | from sac_src.replay_memory import ReplayMemory 15 | from environment import EnsembleTrainingEnv 16 | from utils import * 17 | 18 | class Mesa(EnsembleTrainingEnv): 19 | """The ensemble imbalanced learning framework MESA. 20 | 21 | Parameters 22 | ---------- 23 | args : arguments 24 | See arguments.py for more information. 25 | 26 | base_estimator : scikit-learn classifier object 27 | The base estimator used to build ensemble classifiers. 28 | NO need to support sample weighting. 29 | Built-in `fit()`, `predict()`, `predict_proba()` methods are required. 30 | 31 | n_estimators : int, optional (default=10) 32 | The number of base estimators used to form an MESA ensemble. 33 | 34 | Attributes 35 | ---------- 36 | args : arguments 37 | 38 | rater : object (Rater) 39 | Rater for evaluate classifiers performance on class imabalanced data. 40 | See arguments.py for more information. 41 | 42 | base_estimator_ : object (scikit-learn classifier) 43 | The base estimator from which the ensemble is grown. 44 | 45 | estimators_ : list of classifiers 46 | The collection of fitted sub-estimators. 47 | 48 | n_estimators : int 49 | The number of base estimators used to form an MESA ensemble. 50 | 51 | meta_sampler : object (SAC) 52 | The meta-sampler in MESA. 53 | 54 | env : object (EnsembleTrainingEnv) 55 | The ensemble training environment in MESA. 56 | 57 | memory : object (ReplayMemory) 58 | The replay memory for Soft Actor-Critic training. 59 | """ 60 | def __init__(self, args, base_estimator, n_estimators=10): 61 | 62 | super(Mesa, self).__init__(args, base_estimator) 63 | 64 | # state-size = 2 x num_bins 65 | state_size = int(args.num_bins*2) 66 | action_space = spaces.Box(low=0.0, high=1.0, shape=[1], dtype=np.float32) 67 | 68 | self.args = args 69 | self.n_estimators = n_estimators 70 | self.base_estimator_ = base_estimator 71 | self.meta_sampler = SAC(state_size, action_space, self.args) 72 | self.env = EnsembleTrainingEnv(args, base_estimator) 73 | self.memory = ReplayMemory(self.args.replay_size) 74 | 75 | def meta_fit(self, X_train, y_train, X_valid, y_valid, X_test=None, y_test=None): 76 | """Meta-training process of MESA. 77 | 78 | Parameters 79 | ---------- 80 | X_train : array-like of shape = [n_training_samples, n_features] 81 | The training data instances. 82 | 83 | y_train : array-like of shape = [n_training_samples] 84 | Labels for X_train. 85 | 86 | X_valid : array-like of shape = [n_validation_samples, n_features] 87 | The validation data instances. 88 | 89 | y_valid : array-like of shape = [n_validation_samples] 90 | Labels for X_valid. 91 | 92 | X_test : array-like of shape = [n_training_samples, n_features], optional (default=None) 93 | The test data instances. 94 | 95 | y_train : array-like of shape = [n_training_samples], optional (default=None) 96 | Labels for X_test. 97 | 98 | Returns 99 | ---------- 100 | self : object (Mesa) 101 | """ 102 | # initialize replay memory and environment 103 | self.env.load_data(X_train, y_train, X_valid, y_valid, X_test, y_test, train_ratio=self.args.train_ratio) 104 | self.memory = memory_init_fulfill(self.args, ReplayMemory(self.args.replay_size)) 105 | 106 | self.scores = [] 107 | total_steps = self.args.update_steps + self.args.start_steps 108 | num_steps, num_updates, num_episodes = 0, 0, 0 109 | 110 | # start meta-training 111 | while num_steps < total_steps: 112 | self.env.init() 113 | state = self.env.get_state() 114 | done = False 115 | 116 | # for each episode 117 | while not done: 118 | num_steps += 1 119 | 120 | # take an action 121 | if num_steps >= self.args.start_steps: 122 | action, by = self.meta_sampler.select_action(state), 'mesa' 123 | else: 124 | action, by = self.meta_sampler.action_space.sample(), 'rand' 125 | 126 | # store transition 127 | next_state, reward, done, info = self.env.step(action[0]) 128 | reward = reward * self.args.reward_coefficient 129 | self.memory.push(state, action, reward, next_state, float(done)) 130 | 131 | # update meta-sampler parameters 132 | if num_steps > self.args.start_steps: 133 | for i in range(self.args.updates_per_step): 134 | _, _, _, _, _ = self.meta_sampler.update_parameters( 135 | self.memory, self.args.batch_size, num_updates) 136 | num_updates += self.args.updates_per_step 137 | 138 | # print log to stdout 139 | if self.args.meta_verbose is 'full': 140 | print ('Epi.{:<4d} updates{:<4d}| {} | {} by {}'.format(num_episodes, num_updates, info, action[0], by)) 141 | 142 | if done: 143 | num_episodes += 1 144 | self.record_scores() 145 | # record print mean score of latest args.meta_verbose_mean_episodes to stdout 146 | self.verbose_mean_scores(num_episodes, num_updates, by) 147 | 148 | return self 149 | 150 | def record_scores(self): 151 | """Record the training/validation/test performance scores.""" 152 | train_score = self.env.rater.score(self.env.y_train, self.env.y_pred_train_buffer) 153 | valid_score = self.env.rater.score(self.env.y_valid, self.env.y_pred_valid_buffer) 154 | test_score = self.env.rater.score(self.env.y_test, self.env.y_pred_test_buffer) if self.env.flag_use_test_set else 'NULL' 155 | self.scores.append([train_score, valid_score, test_score] if self.env.flag_use_test_set else [train_score, valid_score]) 156 | return 157 | 158 | def verbose_mean_scores(self, num_episodes, num_updates, by): 159 | """Print mean score of latest n episodes to stdout. 160 | 161 | n = args.meta_verbose_mean_episodes 162 | 163 | Parameters 164 | ---------- 165 | num_episodes : int 166 | The number of finished meta-training episodes. 167 | 168 | num_updates : int 169 | The number of finished meta-sampler updates. 170 | 171 | by : {'rand', 'mesa'}, string 172 | The way of selecting actions in the current episode. 173 | """ 174 | if self.args.meta_verbose is 'full' or (self.args.meta_verbose != 0 and num_episodes % self.args.meta_verbose == 0): 175 | view_bound = max(-self.args.meta_verbose_mean_episodes, -len(self.scores)) 176 | recent_scores_mean = np.array(self.scores)[view_bound:].mean(axis=0) 177 | print ('Epi.{:<4d} updates {:<4d} |last-{}-mean-{}| train {:.3f} | valid {:.3f} | test {:.3f} | by {}'.format( 178 | num_episodes, num_updates, self.args.meta_verbose_mean_episodes, self.args.metric, 179 | recent_scores_mean[0], recent_scores_mean[1], recent_scores_mean[2], by)) 180 | return 181 | 182 | def fit(self, X, y, X_valid, y_valid, n_estimators=None, verbose=False): 183 | """Build a MESA ensemble from training set (X, y) and validation set (X_valid, y_valid). 184 | 185 | Parameters 186 | ---------- 187 | X : array-like of shape = [n_training_samples, n_features] 188 | The training data instances. 189 | 190 | y : array-like of shape = [n_training_samples] 191 | Labels for X. 192 | 193 | X_valid : array-like of shape = [n_validation_samples, n_features] 194 | The validation data instances. 195 | 196 | y_valid : array-like of shape = [n_validation_samples] 197 | Labels for X_valid. 198 | 199 | n_estimators : int, optional (default=self.n_estimators) 200 | The number of base estimators used to form an MESA ensemble. 201 | 202 | verbose: bool, optional (default=False) 203 | Whether to print progress messages to stdout. 204 | 205 | Returns 206 | ---------- 207 | self : object (Mesa) 208 | """ 209 | n_estimators = self.n_estimators if n_estimators is None else n_estimators 210 | self.load_data(X, y, X_valid, y_valid) 211 | self.init() 212 | self.actions_record = [] 213 | for i in range(n_estimators-1): 214 | state = self.get_state() 215 | action = self.meta_sampler.select_action(state) 216 | self.actions_record.append(action[0]) 217 | _, _, _, info = self.step(action[0], verbose) 218 | if verbose: 219 | print ('{:<12s} | action: {} {}'.format('Mesa', action, info)) 220 | return self 221 | 222 | def save_meta_sampler(self, directory='save_model', suffix='meta_sampler'): 223 | """Save trained meta-sampler to files. 224 | 225 | Parameters 226 | ---------- 227 | directory : string, optional (default='save_model') 228 | The directory to save files. 229 | Create the directory if it does not exist. 230 | 231 | suffix : string, optional (default='meta_sampler') 232 | The actor network will be saved in {directory}/actor_{suffix}. 233 | The critic network will be saved in {directory}/critic_{suffix}. 234 | """ 235 | directory_path = f'{directory}/' 236 | if not os.path.exists(directory_path): 237 | os.makedirs(directory_path) 238 | actor_path = f'{directory_path}actor_{suffix}' 239 | critic_path = f'{directory_path}critic_{suffix}' 240 | self.meta_sampler.save_model(actor_path, critic_path) 241 | return 242 | 243 | def load_meta_sampler(self, directory='save_model', suffix='meta_sampler'): 244 | """Load trained meta-sampler from files. 245 | 246 | Parameters 247 | ---------- 248 | directory : string, optional (default='save_model') 249 | The directory to load files. 250 | 251 | suffix : string, optional (default='meta_sampler') 252 | The actor network will be loaded from {directory}/actor_{suffix}. 253 | The critic network will be loaded from {directory}/critic_{suffix}. 254 | """ 255 | directory_path = f'{directory}/' 256 | actor_path = f'{directory_path}actor_{suffix}' 257 | critic_path = f'{directory_path}critic_{suffix}' 258 | self.meta_sampler.load_model(actor_path, critic_path) 259 | return self -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # torch==1.0.0 2 | gym==0.17.3 3 | tqdm==4.28.1 4 | pandas==0.23.4 5 | numpy==1.15.4 6 | seaborn==0.9.0 7 | imbalanced-learn==0.5.0 8 | scikit-learn==0.21 9 | jupyter==1.0.0 -------------------------------------------------------------------------------- /sac_src/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.distributions import Normal 5 | 6 | LOG_SIG_MAX = 2 7 | LOG_SIG_MIN = -20 8 | epsilon = 1e-6 9 | 10 | # Initialize Policy weights 11 | def weights_init_(m): 12 | if isinstance(m, nn.Linear): 13 | torch.nn.init.xavier_uniform_(m.weight, gain=1) 14 | torch.nn.init.constant_(m.bias, 0) 15 | 16 | 17 | class ValueNetwork(nn.Module): 18 | def __init__(self, num_inputs, hidden_dim): 19 | super(ValueNetwork, self).__init__() 20 | 21 | self.linear1 = nn.Linear(num_inputs, hidden_dim) 22 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 23 | self.linear3 = nn.Linear(hidden_dim, 1) 24 | 25 | self.apply(weights_init_) 26 | 27 | def forward(self, state): 28 | x = F.relu(self.linear1(state)) 29 | x = F.relu(self.linear2(x)) 30 | x = self.linear3(x) 31 | return x 32 | 33 | 34 | class QNetwork(nn.Module): 35 | def __init__(self, num_inputs, num_actions, hidden_dim): 36 | super(QNetwork, self).__init__() 37 | 38 | # Q1 architecture 39 | self.linear1 = nn.Linear(num_inputs + num_actions, hidden_dim) 40 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 41 | self.linear3 = nn.Linear(hidden_dim, 1) 42 | 43 | # Q2 architecture 44 | self.linear4 = nn.Linear(num_inputs + num_actions, hidden_dim) 45 | self.linear5 = nn.Linear(hidden_dim, hidden_dim) 46 | self.linear6 = nn.Linear(hidden_dim, 1) 47 | 48 | self.apply(weights_init_) 49 | 50 | def forward(self, state, action): 51 | xu = torch.cat([state, action], 1) 52 | 53 | x1 = F.relu(self.linear1(xu)) 54 | x1 = F.relu(self.linear2(x1)) 55 | x1 = self.linear3(x1) 56 | 57 | x2 = F.relu(self.linear4(xu)) 58 | x2 = F.relu(self.linear5(x2)) 59 | x2 = self.linear6(x2) 60 | 61 | return x1, x2 62 | 63 | 64 | class GaussianPolicy(nn.Module): 65 | def __init__(self, num_inputs, num_actions, hidden_dim, action_space=None): 66 | super(GaussianPolicy, self).__init__() 67 | 68 | self.linear1 = nn.Linear(num_inputs, hidden_dim) 69 | # self.linear2 = nn.Linear(hidden_dim, hidden_dim) 70 | # self.linear3 = nn.Linear(hidden_dim, hidden_dim) 71 | # self.linear4 = nn.Linear(hidden_dim, hidden_dim) 72 | 73 | self.mean_linear = nn.Linear(hidden_dim, num_actions) 74 | self.log_std_linear = nn.Linear(hidden_dim, num_actions) 75 | 76 | self.apply(weights_init_) 77 | 78 | # action rescaling 79 | if action_space is None: 80 | self.action_scale = torch.tensor(1.) 81 | self.action_bias = torch.tensor(0.) 82 | else: 83 | self.action_scale = torch.FloatTensor( 84 | (action_space.high - action_space.low) / 2.) 85 | self.action_bias = torch.FloatTensor( 86 | (action_space.high + action_space.low) / 2.) 87 | 88 | def forward(self, state): 89 | x = F.relu(self.linear1(state)) 90 | # x = F.relu(self.linear2(x)) 91 | # x = F.relu(self.linear3(x)) 92 | # x = F.relu(self.linear4(x)) 93 | mean = self.mean_linear(x) 94 | log_std = self.log_std_linear(x) 95 | log_std = torch.clamp(log_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX) 96 | return mean, log_std 97 | 98 | def sample(self, state): 99 | mean, log_std = self.forward(state) 100 | std = log_std.exp() 101 | normal = Normal(mean, std) 102 | x_t = normal.rsample() # for reparameterization trick (mean + std * N(0,1)) 103 | y_t = torch.tanh(x_t) 104 | action = y_t * self.action_scale + self.action_bias 105 | log_prob = normal.log_prob(x_t) 106 | # Enforcing Action Bound 107 | log_prob -= torch.log(self.action_scale * (1 - y_t.pow(2)) + epsilon) 108 | log_prob = log_prob.sum(1, keepdim=True) 109 | mean = torch.tanh(mean) * self.action_scale + self.action_bias 110 | return action, log_prob, mean 111 | 112 | def to(self, device): 113 | self.action_scale = self.action_scale.to(device) 114 | self.action_bias = self.action_bias.to(device) 115 | return super(GaussianPolicy, self).to(device) 116 | 117 | 118 | class DeterministicPolicy(nn.Module): 119 | def __init__(self, num_inputs, num_actions, hidden_dim, action_space=None): 120 | super(DeterministicPolicy, self).__init__() 121 | self.linear1 = nn.Linear(num_inputs, hidden_dim) 122 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 123 | 124 | self.mean = nn.Linear(hidden_dim, num_actions) 125 | self.noise = torch.Tensor(num_actions) 126 | 127 | self.apply(weights_init_) 128 | 129 | # action rescaling 130 | if action_space is None: 131 | self.action_scale = 1. 132 | self.action_bias = 0. 133 | else: 134 | self.action_scale = torch.FloatTensor( 135 | (action_space.high - action_space.low) / 2.) 136 | self.action_bias = torch.FloatTensor( 137 | (action_space.high + action_space.low) / 2.) 138 | 139 | def forward(self, state): 140 | x = F.relu(self.linear1(state)) 141 | x = F.relu(self.linear2(x)) 142 | mean = torch.tanh(self.mean(x)) * self.action_scale + self.action_bias 143 | return mean 144 | 145 | def sample(self, state): 146 | mean = self.forward(state) 147 | noise = self.noise.normal_(0., std=0.1) 148 | noise = noise.clamp(-0.25, 0.25) 149 | action = mean + noise 150 | return action, torch.tensor(0.), mean 151 | 152 | def to(self, device): 153 | self.action_scale = self.action_scale.to(device) 154 | self.action_bias = self.action_bias.to(device) 155 | self.noise = self.noise.to(device) 156 | return super(DeterministicPolicy, self).to(device) 157 | -------------------------------------------------------------------------------- /sac_src/replay_memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class ReplayMemory: 5 | def __init__(self, capacity): 6 | self.capacity = capacity 7 | self.buffer = [] 8 | self.position = 0 9 | 10 | def push(self, state, action, reward, next_state, done): 11 | if len(self.buffer) < self.capacity: 12 | self.buffer.append(None) 13 | self.buffer[self.position] = (state, action, reward, next_state, done) 14 | self.position = (self.position + 1) % self.capacity 15 | 16 | def sample(self, batch_size): 17 | batch = random.sample(self.buffer, batch_size) 18 | state, action, reward, next_state, done = map(np.stack, zip(*batch)) 19 | return state, action, reward, next_state, done 20 | 21 | def __len__(self): 22 | return len(self.buffer) 23 | -------------------------------------------------------------------------------- /sac_src/sac.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.optim import Adam, lr_scheduler 5 | from sac_src.utils import soft_update, hard_update 6 | from sac_src.model import GaussianPolicy, QNetwork, DeterministicPolicy 7 | 8 | 9 | class SAC(object): 10 | def __init__(self, num_inputs, action_space, args): 11 | 12 | self.gamma = args.gamma 13 | self.tau = args.tau 14 | self.alpha = args.alpha 15 | self.action_space = action_space 16 | self.learning_rate = args.lr 17 | 18 | self.policy_type = args.policy 19 | self.target_update_interval = args.target_update_interval 20 | self.automatic_entropy_tuning = args.automatic_entropy_tuning 21 | 22 | self.device = torch.device("cuda" if args.cuda else "cpu") 23 | 24 | self.critic = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(device=self.device) 25 | self.critic_optim = Adam(self.critic.parameters(), lr=args.lr) 26 | 27 | self.critic_target = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device) 28 | hard_update(self.critic_target, self.critic) 29 | 30 | if self.policy_type == "Gaussian": 31 | # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper 32 | if self.automatic_entropy_tuning == True: 33 | self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item() 34 | self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) 35 | self.alpha_optim = Adam([self.log_alpha], lr=args.lr) 36 | 37 | self.policy = GaussianPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device) 38 | self.policy_optim = Adam(self.policy.parameters(), lr=args.lr) 39 | 40 | else: 41 | self.alpha = 0 42 | self.automatic_entropy_tuning = False 43 | self.policy = DeterministicPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device) 44 | self.policy_optim = Adam(self.policy.parameters(), lr=args.lr) 45 | 46 | self.policy_scheduler = lr_scheduler.StepLR(self.critic_optim, step_size=args.lr_decay_steps, gamma=args.lr_decay_gamma) 47 | 48 | def learning_rate_decay(self, decay_ratio=0.5): 49 | self.learning_rate = self.learning_rate * decay_ratio 50 | self.critic_optim = Adam(self.critic.parameters(), lr=self.learning_rate) 51 | self.policy_optim = Adam(self.policy.parameters(), lr=self.learning_rate) 52 | 53 | def select_action(self, state, eval=False): 54 | state = torch.FloatTensor(state).to(self.device).unsqueeze(0) 55 | if eval == False: 56 | action, _, _ = self.policy.sample(state) 57 | else: 58 | _, _, action = self.policy.sample(state) 59 | return action.detach().cpu().numpy()[0] 60 | 61 | def update_parameters(self, memory, batch_size, updates): 62 | # Sample a batch from memory 63 | state_batch, action_batch, reward_batch, next_state_batch, mask_batch = memory.sample(batch_size=batch_size) 64 | 65 | state_batch = torch.FloatTensor(state_batch).to(self.device) 66 | next_state_batch = torch.FloatTensor(next_state_batch).to(self.device) 67 | action_batch = torch.FloatTensor(action_batch).to(self.device) 68 | reward_batch = torch.FloatTensor(reward_batch).to(self.device).unsqueeze(1) 69 | mask_batch = torch.FloatTensor(mask_batch).to(self.device).unsqueeze(1) 70 | 71 | with torch.no_grad(): 72 | next_state_action, next_state_log_pi, _ = self.policy.sample(next_state_batch) 73 | qf1_next_target, qf2_next_target = self.critic_target(next_state_batch, next_state_action) 74 | min_qf_next_target = torch.min(qf1_next_target, qf2_next_target) - self.alpha * next_state_log_pi 75 | next_q_value = reward_batch + mask_batch * self.gamma * (min_qf_next_target) 76 | 77 | qf1, qf2 = self.critic(state_batch, action_batch) # Two Q-functions to mitigate positive bias in the policy improvement step 78 | qf1_loss = F.mse_loss(qf1, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2] 79 | qf2_loss = F.mse_loss(qf2, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2] 80 | 81 | pi, log_pi, _ = self.policy.sample(state_batch) 82 | 83 | qf1_pi, qf2_pi = self.critic(state_batch, pi) 84 | min_qf_pi = torch.min(qf1_pi, qf2_pi) 85 | 86 | policy_loss = ((self.alpha * log_pi) - min_qf_pi).mean() # Jπ = 𝔼st∼D,εt∼N[α * logπ(f(εt;st)|st) − Q(st,f(εt;st))] 87 | 88 | self.critic_optim.zero_grad() 89 | qf1_loss.backward() 90 | self.critic_optim.step() 91 | 92 | self.critic_optim.zero_grad() 93 | qf2_loss.backward() 94 | self.critic_optim.step() 95 | 96 | self.policy_optim.zero_grad() 97 | policy_loss.backward() 98 | self.policy_optim.step() 99 | self.policy_scheduler.step() 100 | 101 | if self.automatic_entropy_tuning: 102 | alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean() 103 | 104 | self.alpha_optim.zero_grad() 105 | alpha_loss.backward() 106 | self.alpha_optim.step() 107 | 108 | self.alpha = self.log_alpha.exp() 109 | alpha_tlogs = self.alpha.clone() # For TensorboardX logs 110 | else: 111 | alpha_loss = torch.tensor(0.).to(self.device) 112 | alpha_tlogs = torch.tensor(self.alpha) # For TensorboardX logs 113 | 114 | 115 | if updates % self.target_update_interval == 0: 116 | soft_update(self.critic_target, self.critic, self.tau) 117 | 118 | return qf1_loss.item(), qf2_loss.item(), policy_loss.item(), alpha_loss.item(), alpha_tlogs.item() 119 | 120 | # Save model parameters 121 | def save_model(self, actor_path, critic_path): 122 | torch.save(self.policy.state_dict(), actor_path) 123 | torch.save(self.critic.state_dict(), critic_path) 124 | 125 | # Load model parameters 126 | def load_model(self, actor_path, critic_path): 127 | print('Loading models from {} and {}'.format(actor_path, critic_path)) 128 | if actor_path is not None: 129 | self.policy.load_state_dict(torch.load(actor_path)) 130 | if critic_path is not None: 131 | self.critic.load_state_dict(torch.load(critic_path)) 132 | 133 | -------------------------------------------------------------------------------- /sac_src/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | 4 | def create_log_gaussian(mean, log_std, t): 5 | quadratic = -((0.5 * (t - mean) / (log_std.exp())).pow(2)) 6 | l = mean.shape 7 | log_z = log_std 8 | z = l[-1] * math.log(2 * math.pi) 9 | log_p = quadratic.sum(dim=-1) - log_z.sum(dim=-1) - 0.5 * z 10 | return log_p 11 | 12 | def logsumexp(inputs, dim=None, keepdim=False): 13 | if dim is None: 14 | inputs = inputs.view(-1) 15 | dim = 0 16 | s, _ = torch.max(inputs, dim=dim, keepdim=True) 17 | outputs = s + (inputs - s).exp().sum(dim=dim, keepdim=True).log() 18 | if not keepdim: 19 | outputs = outputs.squeeze(dim) 20 | return outputs 21 | 22 | def soft_update(target, source, tau): 23 | for target_param, param in zip(target.parameters(), source.parameters()): 24 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau) 25 | 26 | def hard_update(target, source): 27 | for target_param, param in zip(target.parameters(), source.parameters()): 28 | target_param.data.copy_(param.data) 29 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Feb 8 02:27:20 2020 4 | @author: ZhiningLiu1998 5 | mailto: zhining.liu@outlook.com / v-zhinli@microsoft.com 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | from sklearn.metrics import ( 11 | f1_score, 12 | average_precision_score, 13 | matthews_corrcoef, 14 | ) 15 | from sklearn.model_selection import train_test_split 16 | 17 | class Rater(): 18 | """Rater for evaluate classifiers performance on class imabalanced data. 19 | 20 | Parameters 21 | ---------- 22 | metric : {'aucprc', 'mcc', 'fscore'}, optional (default='aucprc') 23 | Specify the performance metric used for evaluation. 24 | If 'aucprc' then use Area Under Precision-Recall Curve. 25 | If 'mcc' then use Matthews Correlation Coefficient. 26 | If 'fscore' then use F1-score, also known as balanced F-score or F-measure. 27 | Passing other values raises an exception. 28 | 29 | threshold : float, optional (default=0.5) 30 | The threshold used for binarizing the predicted probability. 31 | It does not affect the AUCPRC score 32 | 33 | Attributes 34 | ---------- 35 | metric_ : string 36 | The performance metric used for evaluation. 37 | 38 | threshold_ : float 39 | The predict threshold. 40 | """ 41 | def __init__(self, metric='aucprc', threshold=0.5): 42 | 43 | if metric not in ['aucprc', 'mcc', 'fscore', 'bacc']: 44 | raise ValueError(f'Metric {metric} is not supported.\ 45 | \nSupport metrics: [aucprc, mcc, fscore].') 46 | 47 | self.metric_ = metric 48 | self.threshold_ = threshold 49 | 50 | def score(self, y_true, y_pred): 51 | """Score function. 52 | 53 | Parameters 54 | ---------- 55 | y_true : array-like of shape = [n_samples] 56 | The ground truth labels. 57 | 58 | y_pred : array-like of shape = [n_samples] 59 | The predict probabilities. 60 | 61 | Returns 62 | ---------- 63 | score: float 64 | """ 65 | if self.metric_ == 'aucprc': 66 | return average_precision_score(y_true , y_pred) 67 | elif self.metric_ == 'mcc': 68 | y_pred_b = y_pred.copy() 69 | y_pred_b[y_pred_b < self.threshold_] = 0 70 | y_pred_b[y_pred_b >= self.threshold_] = 1 71 | return matthews_corrcoef(y_true, y_pred_b) 72 | elif self.metric_ == 'fscore': 73 | y_pred_b = y_pred.copy() 74 | y_pred_b[y_pred_b < self.threshold_] = 0 75 | y_pred_b[y_pred_b >= self.threshold_] = 1 76 | return f1_score(y_true, y_pred_b) 77 | 78 | def load_dataset(dataset_name): 79 | """Util function that load training/validation/test data from /data folder. 80 | 81 | Parameters 82 | ---------- 83 | dataset_name : string 84 | Name of the target dataset. 85 | Train/validation/test data are expected to save in .csv files with 86 | suffix _{train/valid/test}.csv. Labels should be at the last column 87 | named with 'label'. 88 | 89 | Returns 90 | ---------- 91 | X_train, y_train, X_valid, y_valid, X_test, y_test 92 | Pandas DataFrames / Series 93 | """ 94 | df_train = pd.read_csv(f'data/{dataset_name}_train.csv') 95 | X_train = df_train[df_train.columns.tolist()[:-1]] 96 | y_train = df_train['label'] 97 | df_valid = pd.read_csv(f'data/{dataset_name}_valid.csv') 98 | X_valid = df_valid[df_valid.columns.tolist()[:-1]] 99 | y_valid = df_valid['label'] 100 | df_test = pd.read_csv(f'data/{dataset_name}_test.csv') 101 | X_test = df_test[df_test.columns.tolist()[:-1]] 102 | y_test = df_test['label'] 103 | return X_train.values, y_train.values, \ 104 | X_valid.values, y_valid.values, \ 105 | X_test.values, y_test.values 106 | 107 | def histogram_error_distribution(y_true, y_pred, bins): 108 | """Util function that compute the error histogram. 109 | 110 | Parameters 111 | ---------- 112 | y_true : array-like of shape = [n_samples] 113 | The ground truth labels. 114 | 115 | y_pred : array-like of shape = [n_samples] 116 | The predict probabilities. 117 | 118 | bins : int, number of bins in the histogram 119 | 120 | Returns 121 | ---------- 122 | hist : array-like of shape = [bins] 123 | """ 124 | error = np.absolute(y_true - y_pred) 125 | hist, _ = np.histogram(error, bins=bins) 126 | return hist 127 | 128 | def gaussian_prob(x, mu, sigma): 129 | """The Gaussian function. 130 | 131 | Parameters 132 | ---------- 133 | x : float 134 | Input number. 135 | 136 | mu : float 137 | Parameter mu of the Gaussian function. 138 | 139 | sigma : float 140 | Parameter sigma of the Gaussian function. 141 | 142 | Returns 143 | ---------- 144 | output : float 145 | """ 146 | return (1 / (sigma * np.sqrt(2*np.pi))) * np.exp(-0.5*np.power((x-mu)/sigma, 2)) 147 | 148 | def meta_sampling(y_pred, y_true, X, n_under_samples, mu, sigma, random_state=None): 149 | """The meta-sampling process in MESA. 150 | 151 | Parameters 152 | ---------- 153 | y_pred : array-like of shape = [n_samples] 154 | The predict probabilities. 155 | 156 | y_true : array-like of shape = [n_samples] 157 | The ground truth labels. 158 | 159 | X : array-like of shape = [n_samples, n_features] 160 | The original data to be meta-sampled. 161 | 162 | n_under_samples : int, <= n_samples 163 | The expected number of instances in the subset after meta-sampling. 164 | 165 | mu : float 166 | Parameter mu of the Gaussian function. 167 | 168 | sigma : float 169 | Parameter sigma of the Gaussian function. 170 | 171 | random_state : int or None, optional (default=None) 172 | If int, random_state is the seed used by the random number generator. 173 | If None, the random number generator is the RandomState instance used 174 | by np.random. 175 | 176 | Returns 177 | ---------- 178 | X_subset : array-like of shape = [n_under_samples, n_features] 179 | The subset after meta-sampling. 180 | """ 181 | sample_weights = gaussian_prob(np.absolute(y_true - y_pred), mu, sigma) 182 | X_subset = pd.DataFrame(X).sample(n_under_samples, weights=sample_weights, random_state=random_state) 183 | return X_subset 184 | 185 | def imbalance_train_test_split(X, y, test_size, random_state=None): 186 | '''Train/Test split that guarantee same class distribution between split datasets.''' 187 | classes = np.unique(y) 188 | X_trains, y_trains, X_tests, y_tests = [], [], [], [] 189 | for label in classes: 190 | inds = (y==label) 191 | X_label, y_label = X[inds], y[inds] 192 | X_train, X_test, y_train, y_test = train_test_split( 193 | X_label, y_label, test_size=test_size, random_state=random_state) 194 | X_trains.append(X_train) 195 | X_tests.append(X_test) 196 | y_trains.append(y_train) 197 | y_tests.append(y_test) 198 | X_train = np.concatenate(X_trains) 199 | X_test = np.concatenate(X_tests) 200 | y_train = np.concatenate(y_trains) 201 | y_test = np.concatenate(y_tests) 202 | return X_train, X_test, y_train, y_test 203 | 204 | def state_scale(state, scale): 205 | '''Scale up the meta-states.''' 206 | return state / state.sum() * 2 * scale 207 | 208 | def memory_init_fulfill(args, memory): 209 | '''Initialize the memory.''' 210 | num_bins = args.num_bins 211 | memory_size = args.replay_size 212 | error_in_bins = np.linspace(0, 1, num_bins) 213 | mu = 0.3 214 | unfitted, midfitted, fitted = \ 215 | gaussian_prob(error_in_bins, 1, mu), \ 216 | gaussian_prob(error_in_bins, 0.5, mu), \ 217 | gaussian_prob(error_in_bins, 0, mu) 218 | underfitting_state = state_scale(np.concatenate([unfitted, unfitted]), num_bins) 219 | learning_state = state_scale(np.concatenate([midfitted, midfitted]), num_bins) 220 | overfitting_state = state_scale(np.concatenate([fitted, midfitted]), num_bins) 221 | noise_scale = 0.5 222 | num_per_transitions = int(memory_size/3) 223 | for i in range(num_per_transitions): 224 | state = underfitting_state + np.random.rand(num_bins*2) * noise_scale 225 | next_state = underfitting_state + np.random.rand(num_bins*2) * noise_scale 226 | memory.push(state, [0.9], args.reward_coefficient * 0.05, next_state, 0) 227 | for i in range(num_per_transitions): 228 | state = learning_state + np.random.rand(num_bins*2) * noise_scale 229 | next_state = learning_state + np.random.rand(num_bins*2) * noise_scale 230 | memory.push(state, [0.5], args.reward_coefficient * 0.05, next_state, 0) 231 | for i in range(num_per_transitions): 232 | state = overfitting_state + np.random.rand(num_bins*2) * noise_scale 233 | next_state = overfitting_state + np.random.rand(num_bins*2) * noise_scale 234 | memory.push(state, [0.1], args.reward_coefficient * 0.05, next_state, 0) 235 | return memory 236 | 237 | def transform(y): 238 | if y.ndim == 1: 239 | y = y[:, np.newaxis] 240 | if y.shape[1] == 1: 241 | y = np.append(1-y, y, axis=1) 242 | return y 243 | 244 | def cross_entropy(y_pred, y_true, epsilon=1e-4): 245 | '''Cross-entropy error function.''' 246 | y_pred = np.clip(y_pred, epsilon, 1 - epsilon) 247 | y_pred = transform(y_pred) 248 | y_true = transform(y_true) 249 | return (-y_true*np.log(y_pred)).sum(axis=1) 250 | 251 | def slide_mean(data, window_half): 252 | '''Slide mean for better visualization.''' 253 | result = [] 254 | for i in range(len(data)): 255 | lower_bound = max(i-window_half, 0) 256 | upper_bound = min(i+window_half+1, len(data)-1) 257 | result.append(np.mean(data[lower_bound:upper_bound])) 258 | return result --------------------------------------------------------------------------------