├── .gitignore ├── LICENSE ├── README.md ├── config ├── dataset │ ├── d_188.yaml │ ├── mimic.yaml │ ├── yeast_downstream.yaml │ └── yeast_upstream.yaml ├── hydra │ └── job_logging │ │ └── custom.yaml ├── hyp │ ├── _.yaml │ ├── hyp_for_single_net.yaml │ └── hyp_for_transfer.yaml ├── model │ ├── ft_transformer.yaml │ ├── ft_transformer_downstream.yaml │ ├── ft_transformer_pretrain.yaml │ ├── mlp.yaml │ └── resnet.yaml ├── optune_config.yaml ├── train_net_config.yaml └── transfer_learn_net_config.yaml ├── data ├── yeast_downstream │ ├── N.csv │ └── y.csv └── yeast_upstream │ ├── N.csv │ ├── normalizer.pkl │ └── y.csv ├── deep_tabular ├── __init__.py ├── adjectives.py ├── models │ ├── __init__.py │ ├── ft_transformer.py │ ├── mlp.py │ └── resnet.py ├── names.py └── utils │ ├── __init__.py │ ├── data_tools.py │ ├── get_demo_dataset.py │ ├── mimic_tools.py │ ├── testing.py │ ├── tools.py │ ├── training.py │ └── warmup.py ├── optune_from_scratch.py ├── requirements.txt ├── train_net_from_scratch.py └── transfer_learn_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | */output*/ 2 | outputs/ 3 | plots/output.pdf/ 4 | .cml_*temp.sh 5 | .idea 6 | .idea/* 7 | pretrained_models 8 | logs 9 | cmllogs 10 | .DS_Store 11 | *.pdf 12 | */*.pdf 13 | junk* 14 | maze_data/train_* 15 | maze_data/test_* 16 | clean_performance.csv 17 | scripts/launch*.sh 18 | scripts/*/launch*.sh 19 | results 20 | runs 21 | plots 22 | classification_training 23 | check_default 24 | checkpoints 25 | *.png 26 | helpers/data/ 27 | plots/ 28 | launch/my_launch/ 29 | output_default 30 | 31 | # Byte-compiled / optimized / DLL files 32 | __pycache__/ 33 | *.py[cod] 34 | *$py.class 35 | 36 | # C extensions 37 | *.so 38 | 39 | # Distribution / packaging 40 | .Python 41 | build/ 42 | develop-eggs/ 43 | dist/ 44 | downloads/ 45 | eggs/ 46 | .eggs/ 47 | lib/ 48 | lib64/ 49 | parts/ 50 | sdist/ 51 | var/ 52 | wheels/ 53 | *.egg-info/ 54 | .installed.cfg 55 | *.egg 56 | MANIFEST 57 | 58 | # PyInstaller 59 | # Usually these files are written by a python script from a template 60 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 61 | *.manifest 62 | *.spec 63 | 64 | # Installer logs 65 | pip-log.txt 66 | pip-delete-this-directory.txt 67 | 68 | # Unit test / coverage reports 69 | htmlcov/ 70 | .tox/ 71 | .coverage 72 | .coverage.* 73 | .cache 74 | nosetests.xml 75 | coverage.xml 76 | *.cover 77 | .hypothesis/ 78 | .pytest_cache/ 79 | 80 | # Translations 81 | *.mo 82 | *.pot 83 | 84 | # Django stuff: 85 | *.log 86 | local_settings.py 87 | db.sqlite3 88 | 89 | # Flask stuff: 90 | instance/ 91 | .webassets-cache 92 | 93 | # Scrapy stuff: 94 | .scrapy 95 | 96 | # Sphinx documentation 97 | docs/_build/ 98 | 99 | # PyBuilder 100 | target/ 101 | 102 | # Jupyter Notebook 103 | .ipynb_checkpoints 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # celery beat schedule file 109 | celerybeat-schedule 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | *.zip 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Valeriia Cherepanova, Roman Levin, and Avi Schwarzschild 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tabular Transfer Learning 2 | 3 | This repository contains the official implementation of the paper 4 | 5 | [Transfer Learning with Deep Tabular Models](https://arxiv.org/abs/2206.15306), now accepted to ICLR 2023! 6 | 7 | Roman Levin, Valeriia Cherepanova, Avi Schwarzschild, Arpit Bansal, C. Bayan Bruss, Tom Goldstein, Andrew Gordon Wilson, Micah Goldblum. 8 | 9 | ## Citation 10 | 11 | If you find our work useful, please cite 12 | ``` 13 | @article{levin2022transfer, 14 | title={Transfer Learning with Deep Tabular Models}, 15 | author={Levin, Roman and Cherepanova, Valeriia and Schwarzschild, Avi and Bansal, Arpit and Bruss, C Bayan and Goldstein, Tom and Wilson, Andrew Gordon and Goldblum, Micah}, 16 | journal={arXiv preprint arXiv:2206.15306}, 17 | year={2022} 18 | } 19 | ``` 20 | 21 | ## Getting Started 22 | 23 | ### Requirements 24 | This code was developed and tested with Python 3.8.2. 25 | 26 | To install requirements: 27 | 28 | ```$ pip install -r requirements.txt``` 29 | 30 | ## Demo Transfer Learning Experiment 31 | While in the paper we used the MetaMIMIC test bed for our transfer learning experiments (please, see instructions below for obtaining it), we provide a demo experiment with a readily downloadable [Yeast](http://mulan.sourceforge.net/datasets-mlc.html) dataset -- a multilabel dataset with 14 targets. 32 | 33 | We created a basic transfer learning setup by splitting the Yeast data into a multi-label [yeast_upstream](data/yeast_upstream) dataset with 13 targets for pretraining and [yeast_downstream](data/yeast_downtream) with the remaining 14-th target as the downstream target. 34 | 35 | Now, we first pretrain FT-Transformer on the upstream data (for details please see the config files implemented using [Hydra](https://hydra.cc/docs/intro/)): 36 | 37 | ```$ python transfer_learn_net.py model=ft_transformer_pretrain dataset=yeast_upstream``` 38 | 39 | Then, we fine-tune the pretrained model on the downstream data: 40 | 41 | ```$ python transfer_learn_net.py model=ft_transformer_downstream dataset=yeast_downstream``` 42 | 43 | And compare the results to the model trained from scratch on the downstream data: 44 | 45 | ```$ python train_net_from_scratch.py model=ft_transformer dataset=yeast_downstream``` 46 | 47 | On the pretrainining 13-target multi-label task with 1400 samples we get AUC of approximately 0.7. The model with transfer learning scores 0.63 AUC on the downstream binary task with 300 samples, while the model trained from scratch achieves 0.58 AUC. 48 | ## MetaMIMIC 49 | In our paper we used the MetaMIMIC test bed for our transfer learning experiments which is based on the [MIMIC-IV clinical database](https://physionet.org/content/mimiciv/1.0/) of ICU admissions. Please see the [MetaMIMIC GitHub](https://github.com/ModelOriented/metaMIMIC) for instructions on constructing the MetaMIMIC dataset. Once constructed, please put it in `data/mimic/MetaMIMIC.csv` and use the provided `config/dataset/mimic.yaml` config. 50 | 51 | ## Saving Protocol 52 | 53 | Each time any of the main scripts are executed, a hash-like adjective-Name combination is created and saved as the `run_id` for that execution. The `run_id` is used to save checkpoints and results without being able to accidentally overwrite any previous runs with similar hyperparameters. The folder used for saving both checkpoints and results can be chosen using the following command line argument. 54 | 55 | ```$ python train_net_from_scratch.py name=``` 56 | 57 | During training, the best performing model (on held-out validation set) is saved in the folder `outputs//training-/model_best.pth` and the corresponding arguments for that run are saved in `outputs//training-/.hydra/`. 58 | 59 | The results are saved in `outputs//training-/stats.json`, the tensorboard data is saved in `outputs//training-/tensorboard`. 60 | 61 | ## Additional Functionality 62 | In addition to transfer learning with deep tabular models, this repo allows to train networks from scratch using ` train_net_from_scratch.py` and to optimize their hyperparameters with [Optuna](https://optuna.org) using `optune_from_scratch.py` 63 | 64 | ## Contributing 65 | 66 | We believe in open-source community driven software development. Please open issues and pull requests with any questions or improvements you have. 67 | 68 | ## References 69 | * We borrow network implementations from the [RTDL repo](https://github.com/Yura52/rtdl) and extensively leverage the RTDL repo in general. 70 | * [Yeast demo data source](http://mulan.sourceforge.net/datasets-mlc.html) 71 | * [MetaMIMIC](https://github.com/ModelOriented/metaMIMIC) 72 | * [MIMIC-IV clinical database](https://physionet.org/content/mimiciv/1.0/) 73 | -------------------------------------------------------------------------------- /config/dataset/d_188.yaml: -------------------------------------------------------------------------------- 1 | name: 188 2 | source: openml 3 | task: multiclass 4 | normalization: quantile 5 | y_policy: 6 | -------------------------------------------------------------------------------- /config/dataset/mimic.yaml: -------------------------------------------------------------------------------- 1 | name: mimic 2 | stage: downstream 3 | task: binclass 4 | normalization: quantile 5 | downstream_target: 0 6 | downstream_sample_num: 200 7 | y_policy: -------------------------------------------------------------------------------- /config/dataset/yeast_downstream.yaml: -------------------------------------------------------------------------------- 1 | name: yeast_downstream 2 | source: local 3 | task: binclass 4 | normalization: quantile 5 | normalizer_path: '../../../data/yeast_upstream/normalizer.pkl' 6 | stage: 'downstream' 7 | y_policy: 8 | -------------------------------------------------------------------------------- /config/dataset/yeast_upstream.yaml: -------------------------------------------------------------------------------- 1 | name: yeast_upstream 2 | source: local 3 | task: multilabel 4 | normalization: quantile 5 | normalizer_path: '../../../data/yeast_upstream/normalizer.pkl' 6 | stage: 'pretrain' 7 | y_policy: 8 | -------------------------------------------------------------------------------- /config/hydra/job_logging/custom.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | formatters: 3 | simple: 4 | format: "[%(asctime)s %(levelname)s]: %(message)s" 5 | datefmt: "%m/%d/%Y %H:%M:%S" 6 | handlers: 7 | console: 8 | class: logging.StreamHandler 9 | formatter: simple 10 | stream: ext://sys.stdout 11 | file: 12 | class: logging.handlers.RotatingFileHandler 13 | formatter: simple 14 | filename: log.log 15 | root: 16 | handlers: [console, file] 17 | 18 | disable_existing_loggers: false 19 | -------------------------------------------------------------------------------- /config/hyp/_.yaml: -------------------------------------------------------------------------------- 1 | epochs: 200 2 | lr: 0.0001 3 | lr_decay: step 4 | lr_factor: 0.1 5 | lr_schedule: 6 | - 40 7 | - 80 8 | optimizer: adam 9 | patience: 30 10 | save_period: -1 11 | seed: 0 12 | test_batch_size: 256 13 | train_batch_size: 256 14 | use_patience: true 15 | val_period: 10 16 | warmup_period: 5 17 | weight_decay: 2e-4 18 | momentum: 0.9 19 | warmup_type: linear 20 | -------------------------------------------------------------------------------- /config/hyp/hyp_for_single_net.yaml: -------------------------------------------------------------------------------- 1 | epochs: 200 2 | lr: 0.0001 3 | lr_decay: step 4 | lr_factor: 0.1 5 | lr_schedule: 6 | - 40 7 | - 80 8 | optimizer: adam 9 | patience: 30 10 | save_period: -1 11 | seed: 0 12 | test_batch_size: 256 13 | train_batch_size: 256 14 | use_patience: true 15 | val_period: 10 16 | warmup_period: 5 17 | weight_decay: 2e-4 18 | momentum: 0.9 19 | warmup_type: linear 20 | head_warmup_period: 10 21 | head_lr: 0.001 22 | -------------------------------------------------------------------------------- /config/hyp/hyp_for_transfer.yaml: -------------------------------------------------------------------------------- 1 | epochs: 200 2 | lr: 0.0001 3 | lr_decay: step 4 | lr_factor: 0.1 5 | lr_schedule: 6 | - 40 7 | - 80 8 | optimizer: adam 9 | patience: 30 10 | save_period: -1 11 | seed: 0 12 | test_batch_size: 256 13 | train_batch_size: 256 14 | use_patience: true 15 | val_period: 10 16 | warmup_period: 5 17 | weight_decay: 2e-4 18 | momentum: 0.9 19 | warmup_type: linear 20 | head_warmup_period: 10 21 | head_lr: 0.001 22 | -------------------------------------------------------------------------------- /config/model/ft_transformer.yaml: -------------------------------------------------------------------------------- 1 | name: ft_transformer 2 | d_embedding: 192 3 | model_path: 4 | use_mlp_head: false 5 | freeze_feature_extractor: false 6 | token_bias: true 7 | n_layers: 3 8 | n_heads: 8 9 | d_ffn_factor: 1.3333333333 10 | attention_dropout: 0.2 11 | ffn_dropout: 0.1 12 | residual_dropout: 0.0 13 | activation: reglu 14 | prenormalization: true 15 | initialization: kaiming 16 | kv_compression: 17 | kv_compression_sharing: -------------------------------------------------------------------------------- /config/model/ft_transformer_downstream.yaml: -------------------------------------------------------------------------------- 1 | name: ft_transformer 2 | d_embedding: 192 3 | model_path: '../../../outputs/transfer-learning-experiment/ft_transformer-yeast_upstream/model_best.pth' 4 | use_mlp_head: false 5 | freeze_feature_extractor: false 6 | token_bias: true 7 | n_layers: 3 8 | n_heads: 8 9 | d_ffn_factor: 1.3333333333 10 | attention_dropout: 0.2 11 | ffn_dropout: 0.1 12 | residual_dropout: 0.0 13 | activation: reglu 14 | prenormalization: true 15 | initialization: kaiming 16 | kv_compression: 17 | kv_compression_sharing: 18 | -------------------------------------------------------------------------------- /config/model/ft_transformer_pretrain.yaml: -------------------------------------------------------------------------------- 1 | name: ft_transformer 2 | d_embedding: 192 3 | model_path: 4 | use_mlp_head: false 5 | freeze_feature_extractor: false 6 | token_bias: true 7 | n_layers: 3 8 | n_heads: 8 9 | d_ffn_factor: 1.3333333333 10 | attention_dropout: 0.2 11 | ffn_dropout: 0.1 12 | residual_dropout: 0.0 13 | activation: reglu 14 | prenormalization: true 15 | initialization: kaiming 16 | kv_compression: 17 | kv_compression_sharing: -------------------------------------------------------------------------------- /config/model/mlp.yaml: -------------------------------------------------------------------------------- 1 | dropout: 0.1 2 | name: mlp 3 | d_embedding: 100 4 | model_path: 5 | d_layers: # cfg.model.d_layers = [100, 100] 6 | - 100 7 | - 100 8 | -------------------------------------------------------------------------------- /config/model/resnet.yaml: -------------------------------------------------------------------------------- 1 | name: resnet 2 | d_embedding: 128 3 | model_path: 4 | d: 200 5 | d_hidden_factor: 3 6 | n_layers: 5 7 | activation: relu 8 | normalization: batchnorm 9 | hidden_dropout: 0.2 10 | residual_dropout: 0.2 -------------------------------------------------------------------------------- /config/optune_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: mlp 3 | - dataset: d_188 4 | - hyp: hyp_for_single_net 5 | - override hydra/job_logging: custom 6 | - _self_ 7 | 8 | 9 | hydra: 10 | run: 11 | dir: ./outputs/${name}/optuning-${model.name}-${dataset.name} 12 | job_logging: 13 | handlers: 14 | file: 15 | filename: train.log 16 | 17 | train_log: train_log 18 | name: from_scratch_optuna 19 | -------------------------------------------------------------------------------- /config/train_net_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: mlp 3 | - dataset: d_188 4 | - hyp: hyp_for_single_net 5 | - override hydra/job_logging: custom 6 | - _self_ 7 | 8 | 9 | hydra: 10 | run: 11 | dir: ./outputs/${name}/training-${model.name}-${dataset.name} 12 | job_logging: 13 | handlers: 14 | file: 15 | filename: train.log 16 | 17 | train_log: train_log 18 | name: from_scratch_default 19 | -------------------------------------------------------------------------------- /config/transfer_learn_net_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: ft_transformer_pretrain 3 | - dataset: yeast_upstream 4 | - hyp: hyp_for_transfer 5 | - override hydra/job_logging: custom 6 | - _self_ 7 | 8 | 9 | hydra: 10 | run: 11 | dir: ./outputs/${name}/${model.name}-${dataset.name} 12 | job_logging: 13 | handlers: 14 | file: 15 | filename: train.log 16 | 17 | train_log: train_log 18 | name: transfer-learning-experiment 19 | -------------------------------------------------------------------------------- /data/yeast_downstream/y.csv: -------------------------------------------------------------------------------- 1 | Class6 2 | 1 3 | 1 4 | 1 5 | 0 6 | 0 7 | 0 8 | 0 9 | 1 10 | 0 11 | 0 12 | 0 13 | 0 14 | 0 15 | 0 16 | 0 17 | 0 18 | 0 19 | 0 20 | 1 21 | 0 22 | 0 23 | 0 24 | 0 25 | 0 26 | 0 27 | 0 28 | 0 29 | 0 30 | 0 31 | 0 32 | 0 33 | 1 34 | 0 35 | 1 36 | 0 37 | 1 38 | 0 39 | 0 40 | 0 41 | 1 42 | 1 43 | 1 44 | 0 45 | 1 46 | 0 47 | 0 48 | 0 49 | 0 50 | 1 51 | 0 52 | 0 53 | 1 54 | 0 55 | 0 56 | 0 57 | 1 58 | 0 59 | 0 60 | 0 61 | 0 62 | 1 63 | 0 64 | 1 65 | 0 66 | 0 67 | 0 68 | 0 69 | 1 70 | 0 71 | 1 72 | 0 73 | 0 74 | 1 75 | 1 76 | 1 77 | 0 78 | 0 79 | 0 80 | 0 81 | 0 82 | 1 83 | 0 84 | 0 85 | 0 86 | 0 87 | 0 88 | 0 89 | 0 90 | 0 91 | 0 92 | 0 93 | 1 94 | 0 95 | 0 96 | 0 97 | 1 98 | 1 99 | 0 100 | 1 101 | 0 102 | 1 103 | 0 104 | 0 105 | 0 106 | 1 107 | 1 108 | 0 109 | 0 110 | 0 111 | 1 112 | 1 113 | 0 114 | 0 115 | 0 116 | 0 117 | 0 118 | 1 119 | 1 120 | 0 121 | 0 122 | 1 123 | 0 124 | 0 125 | 0 126 | 0 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 1 135 | 1 136 | 0 137 | 1 138 | 0 139 | 0 140 | 0 141 | 0 142 | 0 143 | 1 144 | 1 145 | 0 146 | 0 147 | 1 148 | 0 149 | 0 150 | 0 151 | 0 152 | 0 153 | 0 154 | 1 155 | 0 156 | 1 157 | 0 158 | 1 159 | 0 160 | 1 161 | 0 162 | 1 163 | 0 164 | 0 165 | 0 166 | 1 167 | 0 168 | 0 169 | 0 170 | 1 171 | 1 172 | 0 173 | 0 174 | 1 175 | 0 176 | 0 177 | 0 178 | 0 179 | 0 180 | 1 181 | 0 182 | 1 183 | 0 184 | 0 185 | 1 186 | 0 187 | 1 188 | 1 189 | 1 190 | 1 191 | 1 192 | 0 193 | 0 194 | 1 195 | 0 196 | 0 197 | 1 198 | 0 199 | 0 200 | 0 201 | 0 202 | 0 203 | 0 204 | 0 205 | 0 206 | 0 207 | 0 208 | 0 209 | 0 210 | 1 211 | 1 212 | 0 213 | 0 214 | 0 215 | 0 216 | 0 217 | 0 218 | 1 219 | 1 220 | 0 221 | 1 222 | 0 223 | 1 224 | 0 225 | 0 226 | 0 227 | 0 228 | 1 229 | 0 230 | 1 231 | 0 232 | 0 233 | 0 234 | 1 235 | 0 236 | 1 237 | 0 238 | 0 239 | 0 240 | 0 241 | 1 242 | 0 243 | 1 244 | 0 245 | 0 246 | 1 247 | 0 248 | 0 249 | 0 250 | 0 251 | 0 252 | 1 253 | 0 254 | 0 255 | 0 256 | 1 257 | 0 258 | 0 259 | 0 260 | 1 261 | 0 262 | 1 263 | 0 264 | 0 265 | 0 266 | 0 267 | 0 268 | 0 269 | 0 270 | 0 271 | 0 272 | 1 273 | 1 274 | 0 275 | 0 276 | 0 277 | 1 278 | 0 279 | 0 280 | 0 281 | 0 282 | 0 283 | 1 284 | 0 285 | 0 286 | 1 287 | 0 288 | 0 289 | 1 290 | 1 291 | 0 292 | 0 293 | 0 294 | 0 295 | 0 296 | 0 297 | 0 298 | 0 299 | 0 300 | 0 301 | 0 302 | 0 303 | 1 304 | 0 305 | 0 306 | 1 307 | 0 308 | 1 309 | 0 310 | 1 311 | 0 312 | 1 313 | 1 314 | 0 315 | 1 316 | 0 317 | 0 318 | 0 319 | 0 320 | 1 321 | 0 322 | 0 323 | 0 324 | 1 325 | 0 326 | 0 327 | 1 328 | 0 329 | 0 330 | 0 331 | 0 332 | 0 333 | 0 334 | 0 335 | 0 336 | 0 337 | 0 338 | 0 339 | 0 340 | 0 341 | 0 342 | 0 343 | 0 344 | 0 345 | 0 346 | 0 347 | 0 348 | 0 349 | 0 350 | 1 351 | 0 352 | 1 353 | 1 354 | 0 355 | 0 356 | 1 357 | 0 358 | 0 359 | 1 360 | 1 361 | 0 362 | 0 363 | 0 364 | 0 365 | 0 366 | 0 367 | 1 368 | 0 369 | 1 370 | 0 371 | 0 372 | 0 373 | 0 374 | 0 375 | 0 376 | 0 377 | 0 378 | 1 379 | 0 380 | 1 381 | 0 382 | 1 383 | 0 384 | 1 385 | 0 386 | 0 387 | 0 388 | 1 389 | 0 390 | 0 391 | 1 392 | 0 393 | 0 394 | 0 395 | 0 396 | 0 397 | 0 398 | 0 399 | 0 400 | 1 401 | 0 402 | 0 403 | 0 404 | 0 405 | 1 406 | 0 407 | 0 408 | 0 409 | 0 410 | 0 411 | 0 412 | 1 413 | 0 414 | 0 415 | 0 416 | 0 417 | 0 418 | 1 419 | 0 420 | 1 421 | 1 422 | 0 423 | 1 424 | 0 425 | 0 426 | 0 427 | 0 428 | 0 429 | 0 430 | 0 431 | 0 432 | 1 433 | 1 434 | 0 435 | 0 436 | 1 437 | 0 438 | 0 439 | 0 440 | 0 441 | 0 442 | 0 443 | 0 444 | 1 445 | 0 446 | 0 447 | 1 448 | 0 449 | 0 450 | 0 451 | 0 452 | 0 453 | 0 454 | 1 455 | 0 456 | 0 457 | 0 458 | 0 459 | 0 460 | 1 461 | 1 462 | 0 463 | 0 464 | 0 465 | 1 466 | 0 467 | 0 468 | 1 469 | 0 470 | 0 471 | 1 472 | 0 473 | 0 474 | 0 475 | 1 476 | 0 477 | 0 478 | 0 479 | 0 480 | 0 481 | 0 482 | 0 483 | 0 484 | 0 485 | 1 486 | -------------------------------------------------------------------------------- /data/yeast_upstream/normalizer.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LevinRoman/tabular-transfer-learning/5a4e4cf4c7cebdf16c58157504ab6a639623c90d/data/yeast_upstream/normalizer.pkl -------------------------------------------------------------------------------- /deep_tabular/__init__.py: -------------------------------------------------------------------------------- 1 | from deep_tabular import models 2 | from deep_tabular import utils 3 | from deep_tabular.adjectives import adjectives 4 | from deep_tabular.names import names 5 | from deep_tabular.utils.testing import evaluate_model, evaluate_backbone, evaluate_backbone_one_dataset 6 | from deep_tabular.utils.training import TrainingSetup, default_training_loop 7 | 8 | __all__ = ["evaluate_model", 9 | "default_training_loop", 10 | "evaluate_backbone", 11 | "evaluate_backbone_one_dataset", 12 | "models", 13 | "TrainingSetup", 14 | "utils"] 15 | -------------------------------------------------------------------------------- /deep_tabular/adjectives.py: -------------------------------------------------------------------------------- 1 | adjectives = ['abased', 'abject', 'ablaze', 'abler', 'ablest', 'abloom', 'ablush', 'about', 'abreast', 'abridged', 'abroach', 'abroad', 'abrupt', 'abscessed', 'absolved', 'absorbed', 'abstruse', 'absurd', 'abused', 'abuzz', 'accrete', 'accrued', 'accurst', 'acerb', 'aching', 'acock', 'acold', 'acorned', 'acred', 'acrid', 'across', 'acting', 'added', 'addle', 'addorsed', 'adept', 'adjunct', 'admired', 'adnate', 'adored', 'adrift', 'adroit', 'adscript', 'adult', 'adunc', 'adust', 'advised', 'aery', 'afeard', 'afeared', 'affine', 'affined', 'afire', 'aflame', 'afloat', 'afoot', 'afoul', 'afraid', 'after', 'aftmost', 'agape', 'agaze', 'aged', 'ageing', 'ageless', 'agelong', 'aggrieved', 'aghast', 'agile', 'aging', 'agleam', 'agley', 'aglow', 'agnate', 'ago', 'agog', 'agone', 'agreed', 'aground', 'ahead', 'ahorse', 'ahull', 'aidful', 'aidless', 'ailing', 'aimless', 'ain', 'air', 'airborne', 'airless', 'airsick', 'airtight', 'ajar', 'akin', 'alar', 'alate', 'alert', 'algal', 'algid', 'algoid', 'alien', 'alight', 'alike', 'alined', 'alive', 'alleged', 'allowed', 'alloyed', 'alone', 'aloof', 'alright', 'altered', 'altern', 'alvine', 'amazed', 'amber', 'amiss', 'amok', 'amort', 'ample', 'amuck', 'amused', 'android', 'angled', 'anguine', 'anguished', 'anile', 'announced', 'ansate', 'anti', 'antic', 'antique', 'antlered', 'antlike', 'antrorse', 'anxious', 'apart', 'apeak', 'apish', 'appalled', 'applied', 'appressed', 'arcane', 'arching', 'argent', 'arid', 'armchair', 'armless', 'armored', 'aroid', 'aroused', 'arranged', 'arrant', 'arrased', 'arrhythmic', 'artful', 'artless', 'arty', 'ashake', 'ashamed', 'ashen', 'ashy', 'askance', 'askant', 'askew', 'asking', 'aslant', 'asleep', 'aslope', 'asphalt', 'asprawl', 'asquint', 'assumed', 'assured', 'astir', 'astral', 'astute', 'aswarm', 'athirst', 'athrill', 'atilt', 'atrip', 'attached', 'attack', 'attent', 'attired', 'attrite', 'attuned', 'audile', 'aurous', 'austere', 'averse', 'avid', 'avowed', 'awake', 'aware', 'awash', 'away', 'aweless', 'awesome', 'awestruck', 'awful', 'awheel', 'awing', 'awkward', 'awnless', 'awry', 'axile', 'azure', 'babbling', 'baccate', 'backboned', 'backhand', 'backless', 'backmost', 'backstage', 'backstair', 'backstairs', 'backswept', 'backward', 'backwoods', 'baddish', 'baffling', 'baggy', 'bairnly', 'balanced', 'balding', 'baldish', 'baleful', 'balky', 'bally', 'balmy', 'banal', 'bandaged', 'banded', 'baneful', 'bangled', 'bankrupt', 'banner', 'bannered', 'baptist', 'bar', 'barbate', 'bardic', 'bardy', 'bareback', 'barebacked', 'barefaced', 'barefoot', 'barer', 'barest', 'baric', 'barish', 'barkless', 'barky', 'barmy', 'baroque', 'barrelled', 'baseless', 'baser', 'basest', 'bashful', 'basic', 'bassy', 'bastioned', 'bated', 'battered', 'battled', 'batty', 'bausond', 'bawdy', 'beaded', 'beady', 'beaky', 'beaming', 'beamish', 'beamless', 'beamy', 'beardless', 'bearish', 'bearlike', 'beastlike', 'beastly', 'beaten', 'beating', 'beauish', 'becalmed', 'bedded', 'bedfast', 'bedight', 'bedimmed', 'bedrid', 'beechen', 'beefy', 'beery', 'beetle', 'befogged', 'begrimed', 'beguiled', 'behind', 'bellied', 'belted', 'bemazed', 'bemused', 'bended', 'bending', 'bendwise', 'bendy', 'benign', 'benthic', 'benzal', 'bereft', 'berried', 'berserk', 'besieged', 'bespoke', 'besprent', 'bestead', 'bestial', 'betrothed', 'beveled', 'biased', 'bifid', 'biform', 'bigger', 'biggest', 'biggish', 'bijou', 'bilgy', 'bilious', 'billion', 'billionth', 'bilobed', 'binate', 'biped', 'birchen', 'birdlike', 'birken', 'bistred', 'bitchy', 'bitless', 'bitten', 'bitty', 'bivalve', 'bizarre', 'blackish', 'blameful', 'blameless', 'blaring', 'blasted', 'blasting', 'blatant', 'bleary', 'blended', 'blending', 'blindfold', 'blinding', 'blinking', 'blissful', 'blissless', 'blithesome', 'bloated', 'blockish', 'blocky', 'blooded', 'bloodied', 'bloodshot', 'bloodstained', 'blooming', 'bloomless', 'bloomy', 'blotchy', 'blotto', 'blotty', 'blowhard', 'blowsy', 'blowy', 'blowzy', 'blubber', 'bluer', 'bluest', 'bluish', 'blurry', 'blushful', 'blushless', 'boarish', 'boastful', 'boastless', 'bobtail', 'bodger', 'bodied', 'boding', 'boggy', 'bogus', 'bomb', 'bombproof', 'boneless', 'bonism', 'bonkers', 'bony', 'bonzer', 'bookish', 'bookless', 'boorish', 'booted', 'bootleg', 'bootless', 'boozy', 'bordered', 'boring', 'bosker', 'bosky', 'bosom', 'bosomed', 'bossy', 'botchy', 'bouffant', 'boughten', 'bouilli', 'bouncy', 'bounded', 'bounden', 'boundless', 'bousy', 'bovid', 'bovine', 'bowing', 'boxlike', 'boyish', 'bracing', 'brackish', 'bractless', 'braggart', 'bragging', 'braided', 'brainless', 'brainsick', 'brainy', 'brakeless', 'brambly', 'branching', 'branchless', 'branchlike', 'branny', 'brashy', 'brassy', 'brattish', 'bratty', 'braver', 'bravest', 'braving', 'brawny', 'brazen', 'breaking', 'breakneck', 'breasted', 'breathless', 'breathy', 'breechless', 'breeding', 'breezeless', 'breezy', 'brickle', 'bricky', 'bridgeless', 'briefless', 'brilliant', 'brimful', 'brimless', 'brimming', 'brinded', 'brindle', 'brindled', 'brinish', 'briny', 'bristly', 'brittle', 'broadband', 'broadcast', 'broadish', 'broadloom', 'broadside', 'broch', 'broguish', 'bronzy', 'broody', 'broomy', 'browless', 'brownish', 'browny', 'bruising', 'brumal', 'brumous', 'brunet', 'brunette', 'brushless', 'brushy', 'brutal', 'brute', 'brutelike', 'brutish', 'bubbly', 'buccal', 'buckish', 'buckram', 'buckshee', 'buckskin', 'bucktooth', 'bucktoothed', 'budless', 'buggy', 'bughouse', 'buirdly', 'bulbar', 'bulbous', 'bulgy', 'bulky', 'bullate', 'bullied', 'bullish', 'bumbling', 'bumptious', 'bumpy', 'bunchy', 'bunted', 'buoyant', 'burdened', 'burghal', 'buried', 'burlesque', 'burly', 'burry', 'bursal', 'bursting', 'bushy', 'busied', 'buskined', 'bustled', 'busty', 'buttocked', 'buxom', 'bygone', 'byssal', 'caboched', 'caboshed', 'caddish', 'cadenced', 'cadent', 'cadgy', 'cagey', 'cagy', 'caitiff', 'calcic', 'calfless', 'caller', 'callous', 'callow', 'calmy', 'campy', 'cancelled', 'cancrine', 'cancroid', 'candent', 'candied', 'canine', 'cankered', 'canny', 'canty', 'cany', 'capeskin', 'caprine', 'captious', 'captive', 'cardboard', 'carefree', 'careful', 'careless', 'careworn', 'caring', 'carking', 'carlish', 'carmine', 'carnose', 'carpal', 'carping', 'carsick', 'carven', 'casebook', 'casteless', 'castled', 'catching', 'catchweight', 'catchy', 'cattish', 'catty', 'caudate', 'cauline', 'causal', 'causeless', 'cautious', 'cayenned', 'ceaseless', 'cecal', 'cedarn', 'ceilinged', 'censured', 'centered', 'centred', 'centric', 'centrist', 'centum', 'cercal', 'cerise', 'cerous', 'certain', 'cervid', 'cervine', 'cestoid', 'chaffless', 'chaffy', 'chainless', 'chairborne', 'chaliced', 'chalky', 'chambered', 'chanceful', 'chanceless', 'chancroid', 'chancrous', 'chancy', 'changeful', 'changeless', 'changing', 'chapeless', 'chargeful', 'chargeless', 'charming', 'charmless', 'charry', 'chartered', 'chartless', 'chary', 'chasmal', 'chasmic', 'chasmy', 'chasseur', 'chaster', 'chastest', 'chastised', 'chatty', 'checkered', 'checky', 'cheeky', 'cheerful', 'cheerless', 'cheerly', 'cheery', 'cheesy', 'chelate', 'chemic', 'chequy', 'cherty', 'chestnut', 'chesty', 'chevroned', 'chewy', 'chichi', 'chiefless', 'chiefly', 'chiffon', 'childing', 'childish', 'childless', 'childlike', 'childly', 'chill', 'chilly', 'chin', 'chintzy', 'chipper', 'chippy', 'chirpy', 'chiseled', 'chiselled', 'chlorous', 'chocker', 'choicer', 'chokey', 'choking', 'choky', 'chondral', 'choosey', 'choosy', 'chopping', 'choppy', 'choral', 'chordal', 'chordate', 'choric', 'chrismal', 'chronic', 'chthonic', 'chubby', 'chuffy', 'chummy', 'chunky', 'churchless', 'churchly', 'churchward', 'churchy', 'churlish', 'churning', 'chymous', 'cichlid', 'cirrate', 'cirrose', 'cirsoid', 'cissoid', 'cissy', 'cisted', 'cistic', 'citrous', 'citrus', 'clamant', 'clammy', 'clankless', 'clannish', 'clasping', 'classless', 'classy', 'clastic', 'clathrate', 'clausal', 'claustral', 'clavate', 'clawless', 'clayey', 'clayish', 'cleanly', 'cleansing', 'clerkish', 'clerkly', 'cliffy', 'clingy', 'clinquant', 'clipping', 'cliquey', 'cliquish', 'cliquy', 'clithral', 'clitic', 'clockwise', 'cloddish', 'cloddy', 'clogging', 'cloggy', 'cloistered', 'cloistral', 'clonic', 'closer', 'closest', 'clotty', 'clouded', 'cloudless', 'cloudy', 'clovered', 'clownish', 'cloying', 'clubby', 'clucky', 'clueless', 'clumpy', 'clumsy', 'clustered', 'coaly', 'coarser', 'coarsest', 'coastal', 'coastward', 'coastwise', 'coated', 'coatless', 'coccal', 'coccoid', 'cockney', 'cocksure', 'cocky', 'coffered', 'cogent', 'cognate', 'coky', 'coldish', 'collapsed', 'collect', 'colloid', 'colly', 'coltish', 'columned', 'comal', 'comate', 'combined', 'combless', 'combust', 'comely', 'comfy', 'coming', 'commie', 'commo', 'comose', 'compact', 'compelled', 'compleat', 'complete', 'compo', 'composed', 'concave', 'conceived', 'concerned', 'conchal', 'conchate', 'concise', 'condemned', 'condign', 'conferred', 'confined', 'confirmed', 'confused', 'conjoined', 'conjoint', 'conjunct', 'connate', 'conoid', 'conscious', 'constrained', 'consumed', 'contained', 'contrate', 'contrite', 'contrived', 'controlled', 'contused', 'convex', 'convict', 'convinced', 'cooing', 'cooking', 'coolish', 'copied', 'coppiced', 'corbelled', 'cordate', 'corded', 'cordial', 'cordless', 'coreless', 'corking', 'corky', 'cormous', 'cornered', 'cornute', 'corny', 'correct', 'corrupt', 'corvine', 'cosher', 'costal', 'costate', 'costive', 'costly', 'costumed', 'cottaged', 'couchant', 'counter', 'countless', 'courant', 'couthie', 'couthy', 'coxal', 'coyish', 'cozy', 'crabbed', 'crabby', 'crablike', 'crabwise', 'crackbrained', 'crackers', 'cracking', 'crackjaw', 'crackle', 'crackling', 'crackly', 'crackpot', 'craftless', 'crafty', 'cragged', 'craggy', 'cranky', 'crannied', 'crashing', 'craven', 'crawling', 'crawly', 'creaky', 'creamlaid', 'creamy', 'creasy', 'credent', 'creedal', 'creepy', 'crenate', 'crescive', 'cressy', 'crestless', 'cricoid', 'crimeless', 'crimpy', 'crimson', 'crinal', 'cringing', 'crinite', 'crinkly', 'crinoid', 'crinose', 'crippling', 'crispate', 'crispy', 'crisscross', 'cristate', 'croaky', 'crookback', 'crooked', 'crosiered', 'crossbred', 'crosstown', 'crosswise', 'croupous', 'croupy', 'crowded', 'crowing', 'crowning', 'crownless', 'crucial', 'cruder', 'crudest', 'cruel', 'crumbly', 'crumby', 'crummy', 'crumpled', 'crunchy', 'crural', 'crushing', 'crustal', 'crusted', 'crustless', 'crusty', 'crying', 'cryptal', 'cryptic', 'ctenoid', 'cubbish', 'cubist', 'cuboid', 'cultic', 'cultish', 'cultrate', 'cumbrous', 'cunning', 'cupric', 'cuprous', 'curbless', 'curdy', 'cureless', 'curly', 'currish', 'cursed', 'cursing', 'cursive', 'curtate', 'curving', 'curvy', 'cushy', 'cuspate', 'cussed', 'custom', 'cutcha', 'cuter', 'cutest', 'cyan', 'cycloid', 'cyclone', 'cymoid', 'cymose', 'cystoid', 'cytoid', 'czarist', 'daedal', 'daffy', 'daimen', 'dainty', 'daisied', 'dam', 'damaged', 'damfool', 'damning', 'dampish', 'dancing', 'dangling', 'dapper', 'dapple', 'dappled', 'daring', 'darkish', 'darkling', 'darksome', 'dashing', 'dastard', 'dated', 'dateless', 'dauby', 'dauntless', 'daylong', 'daytime', 'deathful', 'deathless', 'deathlike', 'deathly', 'deathy', 'debased', 'debauched', 'deceased', 'decent', 'declared', 'decreed', 'decurved', 'dedal', 'deedless', 'defaced', 'defiled', 'defined', 'deflexed', 'deformed', 'defunct', 'deictic', 'deism', 'deject', 'deltoid', 'demure', 'dendroid', 'denser', 'densest', 'dentate', 'dentoid', 'deposed', 'depraved', 'depressed', 'deprived', 'deranged', 'dermal', 'dermic', 'dermoid', 'dernier', 'descant', 'described', 'desert', 'deserved', 'designed', 'desired', 'desmoid', 'despised', 'destined', 'detached', 'detailed', 'deuced', 'deviled', 'devoid', 'devout', 'dewlapped', 'dewy', 'dextral', 'dextrorse', 'dextrous', 'diarch', 'dicey', 'dickey', 'dicky', 'diet', 'diffuse', 'diffused', 'dighted', 'diglot', 'dilute', 'dimmest', 'dimming', 'dimply', 'dingbats', 'dingy', 'dinkies', 'dinky', 'diplex', 'diploid', 'dippy', 'direful', 'direr', 'direst', 'dirty', 'discalced', 'disclosed', 'discoid', 'discreet', 'discrete', 'diseased', 'disgraced', 'disguised', 'dishy', 'disjoined', 'disjoint', 'disjunct', 'disliked', 'dispensed', 'disperse', 'dispersed', 'displayed', 'displeased', 'disposed', 'dissolved', 'distal', 'distent', 'distilled', 'distinct', 'distrait', 'distraught', 'distressed', 'disturbed', 'distyle', 'disused', 'divers', 'diverse', 'divorced', 'dizzied', 'dizzy', 'docile', 'dockside', 'doddered', 'dodgy', 'dogged', 'dogging', 'doggish', 'doggone', 'doggoned', 'doggy', 'doglike', 'doited', 'doleful', 'dolesome', 'dollish', 'doltish', 'donnard', 'donnered', 'donnish', 'donsie', 'dopey', 'dopy', 'dormant', 'dormie', 'dormy', 'dorty', 'dotal', 'doting', 'dotted', 'doty', 'doubling', 'doubtful', 'doubting', 'doubtless', 'doughy', 'dovelike', 'dovetailed', 'dovish', 'dowdy', 'dowie', 'downbeat', 'downhill', 'downrange', 'downright', 'downstage', 'downstair', 'downstairs', 'downstate', 'downstream', 'downwind', 'dozen', 'dozenth', 'dozing', 'dozy', 'draffy', 'drafty', 'dragging', 'draggy', 'draining', 'drastic', 'dratted', 'draughty', 'dreadful', 'dreamful', 'dreamless', 'dreamlike', 'dreamy', 'dreary', 'dreggy', 'dressy', 'drier', 'driest', 'driftless', 'drifty', 'drippy', 'driven', 'drizzly', 'droning', 'dronish', 'droopy', 'dropping', 'dropsied', 'drossy', 'droughty', 'drouthy', 'drowsing', 'drowsy', 'drudging', 'drumly', 'drunken', 'dryer', 'ducal', 'duckbill', 'duckie', 'ducky', 'ductile', 'duddy', 'dudish', 'dulcet', 'dullish', 'dumbstruck', 'dumpish', 'dun', 'dungy', 'dural', 'duskish', 'dusky', 'dustless', 'dustproof', 'dwarfish', 'dyeline', 'dying', 'earnest', 'earthborn', 'earthbound', 'earthen', 'earthly', 'earthquaked', 'earthward', 'earthy', 'easeful', 'eastbound', 'eastmost', 'eastward', 'eaten', 'eating', 'ebon', 'eccrine', 'ecru', 'edgeless', 'edging', 'edgy', 'eely', 'eerie', 'eery', 'effete', 'effluent', 'effuse', 'egal', 'eighteen', 'eighteenth', 'eightfold', 'eighty', 'elapsed', 'elder', 'eldest', 'eldritch', 'elect', 'elfin', 'elfish', 'elite', 'elmy', 'elvish', 'embowed', 'emersed', 'emptied', 'enarched', 'enate', 'encased', 'enceinte', 'endarch', 'endless', 'endmost', 'endorsed', 'endways', 'enforced', 'engorged', 'engrailed', 'engrained', 'engraved', 'enhanced', 'enjambed', 'enlarged', 'enorm', 'enough', 'enow', 'enraged', 'enrapt', 'enrolled', 'enslaved', 'enthralled', 'entire', 'entranced', 'enured', 'enwrapped', 'equine', 'equipped', 'erased', 'erect', 'ermined', 'erose', 'errant', 'errhine', 'erring', 'ersatz', 'erstwhile', 'escaped', 'essive', 'estranged', 'estrous', 'eterne', 'ethic', 'ethmoid', 'ethnic', 'eustyle', 'evens', 'evoked', 'exact', 'exarch', 'exchanged', 'excused', 'exempt', 'exhaled', 'expert', 'expired', 'exposed', 'exsert', 'extant', 'extinct', 'extrorse', 'eyeless', 'fabled', 'faceless', 'facete', 'factious', 'faddish', 'faddy', 'faded', 'fadeless', 'fading', 'faecal', 'failing', 'faintish', 'fairish', 'faithful', 'faithless', 'falcate', 'falser', 'falsest', 'fameless', 'famished', 'famous', 'fancied', 'fanfold', 'fangled', 'fangless', 'farand', 'farci', 'farfetched', 'farming', 'farouche', 'farrow', 'farther', 'farthest', 'fatal', 'fated', 'fateful', 'fatigue', 'fatigued', 'fatless', 'fatter', 'fattest', 'fattish', 'faucal', 'faucial', 'faultless', 'faulty', 'faunal', 'favored', 'favoured', 'fearful', 'fearless', 'fearsome', 'feastful', 'feathered', 'featured', 'febrile', 'fecal', 'feckless', 'fecund', 'federalist', 'feeble', 'feeblish', 'feeling', 'feisty', 'feline', 'felon', 'felsic', 'fenny', 'feodal', 'feral', 'ferine', 'ferny', 'fervent', 'fervid', 'fesswise', 'festal', 'festive', 'fetching', 'fetial', 'fetid', 'feudal', 'fewer', 'fibered', 'fibroid', 'fibrous', 'fickle', 'fictile', 'fictive', 'fiddling', 'fiddly', 'fiendish', 'fiercer', 'fiercest', 'fifteen', 'fifteenth', 'fifty', 'filar', 'filial', 'filose', 'filthy', 'filtrable', 'financed', 'fineable', 'finer', 'finest', 'fingered', 'finished', 'finite', 'finless', 'finny', 'fireproof', 'firry', 'fishy', 'fissile', 'fistic', 'fitchy', 'fitful', 'fitted', 'fitter', 'fitting', 'fivefold', 'fizzy', 'flabby', 'flaccid', 'flagging', 'flaggy', 'flagrant', 'flameproof', 'flaming', 'flamy', 'flappy', 'flaring', 'flashy', 'flatling', 'flattest', 'flattish', 'flaunty', 'flawless', 'flawy', 'flaxen', 'fleckless', 'fledgeling', 'fledgling', 'fledgy', 'fleeceless', 'fleecy', 'fleeing', 'fleeting', 'fleshless', 'fleshly', 'fleshy', 'flexile', 'flightless', 'flighty', 'flimsy', 'flinty', 'flippant', 'flipping', 'flitting', 'floaty', 'floccose', 'floccus', 'flooded', 'floodlit', 'floppy', 'florid', 'flory', 'flossy', 'floury', 'flowered', 'flowing', 'fluent', 'fluffy', 'flukey', 'fluky', 'flurried', 'fluted', 'fluty', 'flyweight', 'foamless', 'foamy', 'focused', 'focussed', 'foetal', 'foetid', 'fogbound', 'foggy', 'fogless', 'folded', 'folkish', 'folklore', 'folksy', 'fontal', 'foodless', 'foolish', 'foolproof', 'footed', 'footless', 'footling', 'footsore', 'footworn', 'foppish', 'forceful', 'forceless', 'forehand', 'foremost', 'forenamed', 'foresaid', 'foreseen', 'forespent', 'foretold', 'forfeit', 'forky', 'former', 'formless', 'fornent', 'forspent', 'forte', 'forthright', 'fortis', 'forworn', 'foughten', 'fourfold', 'fourscore', 'foursquare', 'fourteenth', 'foxy', 'fozy', 'fractious', 'fractured', 'fragile', 'fragrant', 'frantic', 'fratchy', 'fraudful', 'frazzled', 'freakish', 'freaky', 'freckly', 'freebie', 'freeborn', 'freeing', 'freer', 'freest', 'frenzied', 'frequent', 'freshman', 'fretful', 'fretted', 'fretty', 'fribble', 'friended', 'friendless', 'frightened', 'frightful', 'frilly', 'fringeless', 'frisky', 'frizzly', 'frizzy', 'frockless', 'frolic', 'fronded', 'frontal', 'frontier', 'frontless', 'frosted', 'frostless', 'frostlike', 'frosty', 'frothy', 'froward', 'frowsty', 'frowsy', 'frowzy', 'frozen', 'fructed', 'frugal', 'fruited', 'fruitful', 'fruitless', 'fruity', 'frumpish', 'frumpy', 'frustrate', 'fubsy', 'fucoid', 'fugal', 'fulfilled', 'fulgent', 'fulgid', 'fulsome', 'fulvous', 'fumy', 'funded', 'funest', 'fungal', 'fungoid', 'fungous', 'funky', 'furcate', 'furry', 'furthest', 'furtive', 'furzy', 'fuscous', 'fusil', 'fusile', 'fussy', 'fustian', 'fusty', 'futile', 'fuzzy', 'gabbroid', 'gabled', 'gadoid', 'gadrooned', 'gaga', 'gainful', 'gainless', 'gainly', 'gaited', 'galliard', 'galling', 'gallooned', 'galore', 'gamer', 'gamesome', 'gamest', 'gamey', 'gamic', 'gammy', 'gamy', 'gangling', 'gangly', 'ganoid', 'gaping', 'gardant', 'garish', 'garni', 'gassy', 'gated', 'gateless', 'gaudy', 'gaumless', 'gauzy', 'gawky', 'gawsy', 'gearless', 'geegaw', 'gelded', 'gelid', 'gemel', 'gemmate', 'gemmy', 'genal', 'genial', 'genic', 'genteel', 'genty', 'georgic', 'germane', 'gestic', 'gewgaw', 'ghastful', 'ghastly', 'ghostly', 'ghoulish', 'gibbose', 'gibbous', 'giddied', 'giddy', 'gifted', 'giggly', 'gilded', 'gimcrack', 'gimlet', 'gimpy', 'girlish', 'girly', 'giving', 'glabrate', 'glabrous', 'glacial', 'gladsome', 'glaikit', 'glairy', 'glandered', 'glaring', 'glary', 'glasslike', 'glassy', 'gleeful', 'gleesome', 'gleety', 'glenoid', 'glial', 'glibber', 'glibbest', 'globate', 'globoid', 'globose', 'gloomful', 'glooming', 'gloomy', 'glossies', 'glossy', 'glottic', 'glowing', 'gluey', 'glummer', 'glummest', 'glumpy', 'glutted', 'glyphic', 'glyptic', 'gnarly', 'gnathic', 'gneissic', 'gneissoid', 'gnomic', 'gnomish', 'goalless', 'goateed', 'goatish', 'goddam', 'goddamn', 'goddamned', 'godless', 'godlike', 'godly', 'goitrous', 'goodish', 'goodly', 'gooey', 'goofy', 'goosey', 'goosy', 'gorgeous', 'gormless', 'gorsy', 'gory', 'gouty', 'gowaned', 'goyish', 'graceful', 'graceless', 'gracile', 'gracious', 'gradely', 'grainy', 'grapey', 'grapy', 'grasping', 'graspless', 'grassy', 'grateful', 'grating', 'gratis', 'grave', 'gravel', 'graveless', 'gravest', 'gravid', 'grayish', 'greening', 'greenish', 'greensick', 'greyish', 'griefless', 'grieving', 'grimmer', 'grimmest', 'grimy', 'gripping', 'gripple', 'grippy', 'grisly', 'gristly', 'gritty', 'grizzled', 'groggy', 'groovy', 'groping', 'grotesque', 'grotty', 'grouchy', 'groundless', 'grouty', 'grubby', 'grudging', 'gruesome', 'gruffish', 'grumbly', 'grummer', 'grummest', 'grumose', 'grumous', 'grumpy', 'gruntled', 'guardant', 'guarded', 'guardless', 'guideless', 'guiding', 'guileful', 'guileless', 'guiltless', 'guilty', 'gular', 'gulfy', 'gummous', 'gummy', 'gumptious', 'gunless', 'gushy', 'gusty', 'gutless', 'gutsy', 'gutta', 'guttate', 'gyral', 'gyrate', 'gyrose', 'habile', 'hackly', 'hackneyed', 'hadal', 'haemal', 'haemic', 'haggish', 'hairless', 'hairlike', 'halest', 'halftone', 'hallowed', 'haloid', 'halting', 'hamate', 'hammered', 'hammy', 'handed', 'handled', 'handless', 'handmade', 'handsome', 'handworked', 'handwrought', 'handy', 'hangdog', 'hapless', 'haploid', 'haptic', 'harassed', 'hardback', 'hardened', 'hardwood', 'harlot', 'harmful', 'harmless', 'harnessed', 'harried', 'hastate', 'hasty', 'hatching', 'hated', 'hateful', 'hatless', 'hatted', 'haughty', 'haunted', 'haunting', 'hawkish', 'hawklike', 'haywire', 'hazy', 'headed', 'headfirst', 'headless', 'headlong', 'headmost', 'headstrong', 'heady', 'healing', 'healthful', 'healthy', 'heaping', 'heapy', 'hearted', 'heartfelt', 'hearties', 'heartless', 'heartsome', 'hearty', 'heated', 'heathen', 'heathy', 'heating', 'heavies', 'heaving', 'hectic', 'hedgy', 'heedful', 'heedless', 'heelless', 'hefty', 'heinous', 'heirless', 'hellish', 'helmless', 'helpful', 'helpless', 'hemal', 'hempen', 'hempy', 'hennaed', 'herbaged', 'herbal', 'herbless', 'herby', 'here', 'hidden', 'highbrow', 'highest', 'hilding', 'hilly', 'hinder', 'hindmost', 'hindward', 'hipper', 'hippest', 'hippy', 'hircine', 'hirsute', 'hispid', 'hissing', 'histie', 'histoid', 'hitchy', 'hither', 'hiveless', 'hivelike', 'hobnail', 'hobnailed', 'hoggish', 'hoiden', 'holey', 'hollow', 'holmic', 'holstered', 'homebound', 'homeless', 'homelike', 'homely', 'homesick', 'homespun', 'homeward', 'homey', 'homy', 'honest', 'honeyed', 'honied', 'hoodless', 'hoofless', 'hooly', 'hopeful', 'hopeless', 'hopping', 'horal', 'hornish', 'hornless', 'hornlike', 'horny', 'horrent', 'horrid', 'horsey', 'horsy', 'hotfoot', 'hotshot', 'hotter', 'hottest', 'hotting', 'hottish', 'hourlong', 'hourly', 'housebound', 'houseless', 'hoven', 'howling', 'hoyden', 'hueless', 'huffish', 'huffy', 'huger', 'hugest', 'hulking', 'hulky', 'humbler', 'humdrum', 'humic', 'humid', 'hummel', 'humpbacked', 'humpy', 'hunchback', 'hunchbacked', 'hundredth', 'hungry', 'hunky', 'hunted', 'hurling', 'hurried', 'hurtful', 'hurtless', 'hurtling', 'husky', 'hydric', 'hydro', 'hydroid', 'hydrous', 'hymnal', 'hyoid', 'hyphal', 'hypnoid', 'icky', 'ictic', 'idem', 'idled', 'idlest', 'idling', 'iffy', 'ignored', 'imbued', 'immane', 'immense', 'immersed', 'immune', 'impel', 'impelled', 'impish', 'implied', 'imposed', 'improved', 'impure', 'inane', 'inapt', 'inboard', 'inborn', 'inbound', 'inbred', 'inbreed', 'inby', 'incased', 'incensed', 'incised', 'incog', 'increased', 'incrust', 'incult', 'incurved', 'incuse', 'indign', 'indoor', 'indrawn', 'inept', 'infect', 'infelt', 'infirm', 'inflamed', 'inflexed', 'inform', 'informed', 'ingrain', 'ingrained', 'ingrate', 'ingrown', 'inhaled', 'inhumed', 'injured', 'inky', 'inlaid', 'inmost', 'innate', 'inphase', 'inrush', 'insane', 'inscribed', 'inshore', 'insides', 'inspired', 'instinct', 'insured', 'intact', 'intense', 'intent', 'intern', 'interred', 'intime', 'intoed', 'intoned', 'intown', 'introrse', 'inured', 'involved', 'inward', 'inwrought', 'irate', 'ireful', 'irksome', 'itching', 'itchy', 'ivied', 'jaded', 'jadish', 'jagged', 'jaggy', 'jammy', 'jangly', 'jannock', 'japan', 'jarring', 'jasp', 'jaundiced', 'jazzy', 'jealous', 'jejune', 'jellied', 'jerky', 'jessant', 'jestful', 'jesting', 'jet', 'jetting', 'jetty', 'jeweled', 'jewelled', 'jiggered', 'jiggish', 'jiggly', 'jingly', 'jobless', 'jocose', 'jocund', 'jointed', 'jointless', 'jointured', 'joking', 'jolty', 'jouncing', 'jowly', 'joyful', 'joyless', 'joyous', 'jubate', 'jugal', 'jugate', 'juiceless', 'juicy', 'jumbled', 'jumpy', 'jungly', 'jural', 'jurant', 'jussive', 'jutting', 'kacha', 'kaput', 'karmic', 'karstic', 'kayoed', 'kerchiefed', 'keyless', 'khaki', 'kidnapped', 'killing', 'kilted', 'kindless', 'kindly', 'kindred', 'kingless', 'kinglike', 'kingly', 'kinky', 'kinless', 'kirtled', 'kittle', 'klephtic', 'klutzy', 'knaggy', 'knavish', 'kneeling', 'knickered', 'knifeless', 'knightless', 'knightly', 'knitted', 'knobby', 'knotless', 'knotted', 'knotty', 'knowing', 'knuckly', 'knurly', 'kookie', 'kooky', 'kosher', 'kutcha', 'labelled', 'labile', 'labored', 'laboured', 'labrid', 'labroid', 'lacking', 'lacy', 'laddish', 'laden', 'laggard', 'laic', 'lairy', 'laky', 'lambdoid', 'lambent', 'lamblike', 'lamer', 'lamest', 'laming', 'lanate', 'landed', 'landless', 'landscaped', 'landward', 'languid', 'lanky', 'lanose', 'lapelled', 'lapstrake', 'larboard', 'larger', 'largest', 'largish', 'larine', 'larkish', 'larky', 'larval', 'lashing', 'lasting', 'lated', 'lateen', 'later', 'latest', 'lathlike', 'lathy', 'latish', 'latter', 'latticed', 'laurelled', 'lavish', 'lawful', 'lawless', 'lawny', 'leachy', 'leaden', 'leadless', 'leady', 'leafless', 'leafy', 'leaky', 'leaning', 'leaping', 'learned', 'leary', 'leathern', 'ledgy', 'leery', 'leftward', 'legged', 'leggy', 'legit', 'legless', 'leisure', 'leisured', 'lengthways', 'lengthwise', 'lengthy', 'lenis', 'lenten', 'lentic', 'lento', 'lentoid', 'leprose', 'leprous', 'lettered', 'licenced', 'licensed', 'licit', 'lidded', 'lidless', 'liege', 'lifeful', 'lifeless', 'lifelike', 'lifelong', 'lighted', 'lightfast', 'lightful', 'lightish', 'lightless', 'lightsome', 'lightweight', 'lignite', 'likely', 'lilied', 'limbate', 'limbless', 'limey', 'limpid', 'limy', 'liney', 'lingual', 'linty', 'liny', 'lipless', 'lipoid', 'lippy', 'lissom', 'lissome', 'listless', 'lither', 'lithesome', 'lithest', 'lithic', 'litho', 'lithoid', 'litten', 'littler', 'littlest', 'livelong', 'lively', 'livid', 'loaded', 'loamy', 'loathful', 'loathly', 'loathsome', 'lobar', 'lobate', 'lobose', 'lofty', 'logy', 'lonesome', 'longer', 'longhand', 'longing', 'longish', 'longsome', 'longwall', 'longwise', 'looking', 'loonies', 'loopy', 'looser', 'loosest', 'lordless', 'lordly', 'losel', 'losing', 'lossy', 'lotic', 'loudish', 'lounging', 'louring', 'loury', 'lousy', 'loutish', 'louvered', 'louvred', 'loveless', 'lovelorn', 'lovely', 'lovesick', 'lovesome', 'lowly', 'loyal', 'lozenged', 'lubric', 'lucent', 'lucid', 'luckless', 'lukewarm', 'lumpen', 'lumpish', 'lunate', 'lupine', 'lurdan', 'lurid', 'luscious', 'lushy', 'lustful', 'lustral', 'lustred', 'lustrous', 'lusty', 'lying', 'lymphoid', 'lyrate', 'lyric', 'macled', 'madcap', 'maddest', 'madding', 'maigre', 'mainstream', 'maintained', 'makeless', 'makeshift', 'malar', 'male', 'malign', 'malty', 'mammoth', 'man', 'maneless', 'manful', 'mangey', 'mangy', 'manic', 'manky', 'manlike', 'mannered', 'mannish', 'mansard', 'mantic', 'many', 'marching', 'mardy', 'marish', 'maroon', 'married', 'marshy', 'masking', 'massive', 'massy', 'mastless', 'mastoid', 'matchless', 'mated', 'matey', 'matin', 'matted', 'mature', 'maudlin', 'maungy', 'mawkish', 'maxi', 'mazy', 'meager', 'meagre', 'meaning', 'measled', 'measly', 'measured', 'meaty', 'medley', 'melic', 'mellow', 'mensal', 'menseful', 'menseless', 'mere', 'merest', 'merging', 'mesarch', 'meshed', 'mesic', 'messier', 'messy', 'metalled', 'mettled', 'mickle', 'middling', 'midget', 'midi', 'midmost', 'midship', 'midships', 'miffy', 'mighty', 'migrant', 'milkless', 'million', 'millionth', 'millrun', 'mimic', 'mincing', 'minded', 'mindful', 'mindless', 'mingy', 'mini', 'minim', 'minion', 'minute', 'mirky', 'mirthful', 'mirthless', 'miry', 'mis', 'misformed', 'mislaid', 'misproud', 'missive', 'misty', 'mistyped', 'misused', 'mitered', 'mizzen', 'mnemic', 'moanful', 'mobbish', 'model', 'modeled', 'modest', 'modish', 'molal', 'molar', 'moldy', 'molten', 'monarch', 'moneyed', 'monger', 'mongrel', 'monied', 'monism', 'monkish', 'mono', 'monstrous', 'montane', 'monthly', 'mony', 'moody', 'moonish', 'moonless', 'moonlit', 'moonstruck', 'moony', 'moory', 'mopey', 'mopy', 'mordant', 'moreish', 'morish', 'morose', 'mossy', 'motey', 'mothy', 'motile', 'motored', 'mottled', 'mounted', 'mournful', 'mousey', 'mousy', 'mouthless', 'mouthy', 'moveless', 'mowburnt', 'mucid', 'mucking', 'muckle', 'mucky', 'mucoid', 'muddy', 'muggy', 'muley', 'mulish', 'mulley', 'mumchance', 'mundane', 'mural', 'murine', 'murky', 'murrey', 'muscid', 'muscly', 'museful', 'mushy', 'musing', 'musky', 'mussy', 'mustached', 'musty', 'mutant', 'muted', 'muzzy', 'mythic', 'nacred', 'nagging', 'naggy', 'naiant', 'naif', 'nailless', 'naissant', 'naive', 'nameless', 'naming', 'napless', 'napping', 'nappy', 'nary', 'nascent', 'nasty', 'natant', 'natty', 'naughty', 'nauseous', 'needful', 'needless', 'needy', 'negroid', 'neighbor', 'neighbour', 'nephric', 'nerval', 'nervate', 'nerveless', 'nervine', 'nervy', 'nescient', 'nested', 'nestlike', 'netted', 'nettly', 'neural', 'neuron', 'neuter', 'newborn', 'newish', 'newsless', 'newsy', 'nicer', 'nicest', 'nifty', 'niggard', 'niggling', 'nightless', 'nightlong', 'nightly', 'nimble', 'nimbused', 'ninefold', 'nineteen', 'ninety', 'nipping', 'nippy', 'nitid', 'nitty', 'nival', 'nobby', 'nocent', 'nodal', 'nodding', 'nodose', 'nodous', 'noiseless', 'noisette', 'noisome', 'noisy', 'nonplused', 'nonplussed', 'nonstick', 'northmost', 'northward', 'nosey', 'notal', 'notchy', 'noted', 'noteless', 'noticed', 'notour', 'novel', 'novice', 'noxious', 'nubbly', 'nubile', 'nudist', 'numbing', 'nuptial', 'nutant', 'nutlike', 'nutmegged', 'nutty', 'nymphal', 'oafish', 'oaken', 'oarless', 'oaten', 'obese', 'oblate', 'obliged', 'oblique', 'oblong', 'obscene', 'obscure', 'observed', 'obtect', 'obtuse', 'obverse', 'occult', 'ocher', 'ochre', 'ocker', 'oddball', 'offbeat', 'offhand', 'offish', 'offscreen', 'offshore', 'offside', 'often', 'oily', 'okay', 'olden', 'older', 'oldest', 'olid', 'only', 'onshore', 'onside', 'onstage', 'onward', 'oozing', 'oozy', 'ornate', 'orphan', 'ortho', 'oscine', 'osiered', 'osmic', 'osmous', 'otic', 'outback', 'outboard', 'outbound', 'outbred', 'outcast', 'outcaste', 'outdone', 'outdoor', 'outland', 'outlaw', 'outlined', 'outmost', 'outraged', 'outright', 'outsize', 'outsized', 'outspread', 'outworn', 'ovate', 'over', 'overt', 'ovine', 'ovoid', 'owing', 'owlish', 'owllike', 'packaged', 'padded', 'pagan', 'painful', 'painless', 'paler', 'palest', 'paling', 'palish', 'pallid', 'pally', 'palmar', 'palmate', 'palmy', 'palpate', 'palsied', 'paltry', 'paly', 'pan', 'paneled', 'panniered', 'panzer', 'papist', 'pappose', 'pappy', 'par', 'pardine', 'parklike', 'parky', 'parlous', 'parol', 'parotid', 'parted', 'partite', 'pass', 'passant', 'passless', 'pasteboard', 'pasted', 'pastel', 'pasties', 'pasty', 'patchy', 'patent', 'pathic', 'pathless', 'patient', 'paunchy', 'pausal', 'pauseful', 'pauseless', 'pavid', 'pawky', 'payoff', 'peaceful', 'peaceless', 'peachy', 'peaked', 'peaky', 'pearlized', 'peaty', 'pebbly', 'peccant', 'peckish', 'pedal', 'pedate', 'peddling', 'peeling', 'peerless', 'peevish', 'peewee', 'peltate', 'pelting', 'pencilled', 'pendant', 'pendent', 'pending', 'penile', 'pennate', 'pennied', 'pennoned', 'pensile', 'pensive', 'peppy', 'perceived', 'percent', 'percoid', 'perished', 'perjured', 'perky', 'perplexed', 'perverse', 'pesky', 'petalled', 'petite', 'petrous', 'pettish', 'pewter', 'phaseless', 'phasic', 'phasmid', 'phatic', 'phlegmy', 'phocine', 'phonal', 'phoney', 'phonic', 'phony', 'photic', 'phrenic', 'phthisic', 'phylloid', 'physic', 'piano', 'picked', 'pickled', 'picky', 'pictured', 'piddling', 'piebald', 'piecemeal', 'piercing', 'piggie', 'piggish', 'pillaged', 'pillared', 'pilose', 'pimpled', 'pimply', 'pinchbeck', 'piney', 'pinguid', 'pinkish', 'pinnate', 'pinpoint', 'piny', 'pious', 'pipeless', 'pipelike', 'piping', 'pipy', 'piquant', 'piscine', 'pitchy', 'pithy', 'pitted', 'placeless', 'placid', 'placoid', 'plagal', 'plaguey', 'plaguy', 'plaided', 'plaintive', 'plangent', 'plantar', 'plantless', 'plashy', 'plastered', 'plastics', 'plated', 'platy', 'plausive', 'playful', 'pleading', 'pleasing', 'plebby', 'pleural', 'pliant', 'plical', 'plicate', 'plodding', 'plosive', 'plotful', 'plotless', 'plucky', 'plumaged', 'plumate', 'plumbic', 'plumbless', 'plumbous', 'plummy', 'plumose', 'plumy', 'plusher', 'plushest', 'poachy', 'pockmarked', 'pocky', 'podgy', 'poignant', 'pointing', 'pointless', 'pokey', 'pokies', 'poky', 'polished', 'polite', 'pollened', 'poltroon', 'pompous', 'ponceau', 'pongid', 'poorly', 'poppied', 'porcine', 'porky', 'porous', 'porrect', 'portly', 'possessed', 'postern', 'postiche', 'postponed', 'potent', 'potted', 'potty', 'powered', 'practic', 'practiced', 'practised', 'praising', 'prayerful', 'prayerless', 'preachy', 'preborn', 'precast', 'precise', 'prefab', 'preggers', 'pregnant', 'premed', 'premier', 'premiere', 'premorse', 'prepared', 'prepense', 'preschool', 'prescribed', 'prescript', 'present', 'preserved', 'preset', 'pressing', 'pressor', 'presto', 'presumed', 'pretend', 'pretty', 'prewar', 'priceless', 'pricey', 'pricy', 'prideful', 'prideless', 'priestly', 'priggish', 'primal', 'primate', 'primsie', 'princely', 'printed', 'printless', 'prissy', 'pristine', 'privies', 'probing', 'produced', 'profane', 'profaned', 'professed', 'profound', 'profuse', 'prolate', 'prolix', 'pronounced', 'proposed', 'proscribed', 'prostate', 'prostrate', 'prostyle', 'prosy', 'proven', 'provoked', 'prowessed', 'proxy', 'prudent', 'prudish', 'prunted', 'prying', 'pseudo', 'psycho', 'pubic', 'pucka', 'puddly', 'pudgy', 'puffy', 'puggish', 'puggy', 'puisne', 'pukka', 'puling', 'pulpy', 'pulsing', 'punchy', 'punctate', 'punctured', 'pungent', 'punkah', 'puny', 'pupal', 'purblind', 'purer', 'purest', 'purging', 'purplish', 'purpure', 'pursued', 'pursy', 'pushing', 'pushy', 'pussy', 'putrid', 'pygmoid', 'pyknic', 'pyoid', 'quadrate', 'quadric', 'quaggy', 'quaky', 'qualmish', 'quantal', 'quartan', 'quartered', 'quartic', 'quartile', 'queasy', 'queenless', 'queenly', 'quenchless', 'quibbling', 'quickset', 'quiet', 'quilted', 'quinate', 'quinoid', 'quinsied', 'quintan', 'quintic', 'quippish', 'quirky', 'quondam', 'rabic', 'rabid', 'racemed', 'racing', 'racist', 'racy', 'raddled', 'raffish', 'raging', 'rainier', 'rainless', 'rainproof', 'raising', 'rakehell', 'rakish', 'ralline', 'ramal', 'rambling', 'rammish', 'ramose', 'rampant', 'ramstam', 'rancid', 'randie', 'randy', 'rangy', 'ranking', 'raploch', 'rarer', 'rarest', 'raring', 'rascal', 'rasping', 'raspy', 'ratite', 'ratlike', 'rattish', 'rattling', 'rattly', 'ratty', 'raucous', 'raunchy', 'ravaged', 'raving', 'rawboned', 'rawish', 'rayless', 'rearmost', 'rearward', 'reasoned', 'rebel', 'reborn', 'rebuked', 'reckless', 'recluse', 'record', 'rectal', 'recurved', 'redder', 'reddest', 'reddish', 'reedy', 'reeky', 'refer', 'refined', 'regal', 'regent', 'regnal', 'regnant', 'released', 'relieved', 'remiss', 'remnant', 'removed', 'rending', 'renowned', 'rental', 'repand', 'repent', 'replete', 'reproved', 'reptant', 'reptile', 'required', 'rescued', 'resigned', 'resolved', 'restful', 'resting', 'restive', 'restless', 'restored', 'retail', 'retained', 'retired', 'retral', 'retrorse', 'retuse', 'revealed', 'revered', 'reviled', 'revived', 'revolved', 'rheumy', 'rhinal', 'rhodic', 'rhomboid', 'rhotic', 'rhythmic', 'riant', 'ribald', 'ribless', 'riblike', 'ridden', 'rident', 'ridgy', 'riftless', 'righteous', 'rightful', 'rightish', 'rightist', 'rightward', 'rigid', 'riming', 'rimless', 'rimose', 'rimy', 'rindless', 'rindy', 'ringent', 'ringless', 'ripping', 'ripply', 'risen', 'risky', 'riteless', 'ritzy', 'rival', 'riven', 'roadless', 'roasting', 'robust', 'rodded', 'rodless', 'rodlike', 'roguish', 'roily', 'rollneck', 'rompish', 'roofless', 'rooky', 'roomy', 'rooted', 'rootless', 'rootlike', 'ropy', 'roseless', 'roselike', 'rostral', 'rosy', 'rotate', 'rotted', 'rotting', 'rotund', 'roughcast', 'roughish', 'rounded', 'rounding', 'roundish', 'roupy', 'rousing', 'routed', 'routine', 'rowdy', 'rubbly', 'rubied', 'rubric', 'rudish', 'rueful', 'ruffled', 'rufous', 'rugged', 'rugose', 'ruling', 'rumbly', 'rummy', 'rumpless', 'runic', 'runny', 'runtish', 'runty', 'rushing', 'rushy', 'russet', 'rustic', 'rustred', 'rusty', 'ruthful', 'ruthless', 'rutted', 'ruttish', 'rutty', 'saclike', 'sacral', 'sadist', 'sagging', 'said', 'sainted', 'saintly', 'saline', 'sallow', 'saltant', 'salted', 'saltier', 'saltish', 'saltless', 'salty', 'salving', 'sandalled', 'sanded', 'sandy', 'saner', 'sanest', 'sanguine', 'sapid', 'sapless', 'sappy', 'sarcoid', 'sarcous', 'sarky', 'sassy', 'sated', 'satem', 'saucy', 'saut', 'saving', 'savvy', 'scabby', 'scabrous', 'scaldic', 'scalelike', 'scalene', 'scalpless', 'scampish', 'scandent', 'scanty', 'scaphoid', 'scarcer', 'scarcest', 'scarless', 'scary', 'scatheless', 'scathing', 'scatty', 'scentless', 'sceptral', 'scheming', 'schistose', 'schizo', 'schizoid', 'schmaltzy', 'schmalzy', 'scientific', 'scincoid', 'scirrhoid', 'scirrhous', 'scissile', 'scleroid', 'sclerosed', 'sclerous', 'scombrid', 'scombroid', 'scopate', 'scornful', 'scraggly', 'scraggy', 'scrambled', 'scrannel', 'scrappy', 'scratchless', 'scratchy', 'scrawly', 'scrawny', 'screaky', 'screeching', 'screwy', 'scribal', 'scrimpy', 'scroddled', 'scroggy', 'scrotal', 'scrubbed', 'scrubby', 'scruffy', 'scrumptious', 'sculptured', 'scummy', 'scungy', 'scurrile', 'scurry', 'scurvy', 'scutate', 'seaboard', 'seaborne', 'seamless', 'seamy', 'searching', 'seasick', 'seatless', 'seaward', 'second', 'sectile', 'secund', 'secure', 'sedate', 'sedgy', 'seduced', 'seedless', 'seedy', 'seeing', 'seeking', 'seely', 'seeming', 'seemly', 'seismal', 'seismic', 'sejant', 'select', 'selfish', 'selfless', 'selfsame', 'semi', 'senile', 'sensate', 'senseless', 'septal', 'septate', 'sequent', 'sequined', 'seral', 'serene', 'serfish', 'serflike', 'serrate', 'serried', 'serviced', 'servo', 'setose', 'severe', 'sexism', 'sexist', 'sexless', 'sextan', 'sexy', 'shabby', 'shaded', 'shadeless', 'shadowed', 'shady', 'shaftless', 'shaken', 'shaky', 'shallow', 'shalwar', 'shamefaced', 'shameful', 'shameless', 'shapeless', 'shapely', 'shaping', 'shaven', 'shawlless', 'sheathy', 'sheepish', 'shellproof', 'shelly', 'shickered', 'shieldless', 'shieldlike', 'shier', 'shiest', 'shiftless', 'shifty', 'shingly', 'shining', 'shiny', 'shipboard', 'shipless', 'shipshape', 'shirtless', 'shirty', 'shocking', 'shoddy', 'shoeless', 'shopworn', 'shoreless', 'shoreward', 'shortcut', 'shortish', 'shorty', 'shotten', 'showy', 'shredded', 'shredless', 'shrewish', 'shrieval', 'shrinelike', 'shrouding', 'shroudless', 'shrubby', 'shrunken', 'shyer', 'shyest', 'sicker', 'sicklied', 'sickly', 'sideling', 'sidelong', 'sideward', 'sideways', 'sighful', 'sighted', 'sightless', 'sightly', 'sigmate', 'silenced', 'silken', 'silty', 'silvan', 'silvern', 'simplex', 'sincere', 'sinful', 'singing', 'singsong', 'sinless', 'sinning', 'sissy', 'sister', 'sixfold', 'sixteen', 'sixty', 'sizy', 'skaldic', 'sketchy', 'skewbald', 'skidproof', 'skilful', 'skillful', 'skimpy', 'skinking', 'skinless', 'skinny', 'skirtless', 'skittish', 'skyward', 'slaggy', 'slakeless', 'slangy', 'slantwise', 'slapstick', 'slashing', 'slaty', 'slavish', 'sleazy', 'sleekit', 'sleeky', 'sleepless', 'sleepwalk', 'sleepy', 'sleety', 'sleeveless', 'slender', 'slickered', 'slier', 'sliest', 'slighting', 'slimline', 'slimmer', 'slimmest', 'slimming', 'slimsy', 'slimy', 'slinky', 'slippy', 'slipshod', 'sloping', 'sloshy', 'slothful', 'slouchy', 'sloughy', 'sludgy', 'sluggard', 'sluggish', 'sluicing', 'slumbrous', 'slummy', 'slushy', 'sluttish', 'smacking', 'smallish', 'smarmy', 'smartish', 'smarty', 'smashing', 'smeary', 'smectic', 'smelly', 'smileless', 'smiling', 'smitten', 'smokeproof', 'smoking', 'smothered', 'smugger', 'smuggest', 'smutty', 'snafu', 'snaggy', 'snakelike', 'snaky', 'snappish', 'snappy', 'snarly', 'snatchy', 'snazzy', 'sneaking', 'sneaky', 'snider', 'snidest', 'sniffy', 'snippy', 'snobbish', 'snoopy', 'snooty', 'snoozy', 'snoring', 'snotty', 'snouted', 'snowless', 'snowlike', 'snubby', 'snuffly', 'snuffy', 'snugger', 'snuggest', 'snugging', 'soapless', 'soapy', 'soaring', 'sober', 'socko', 'sodden', 'softish', 'softwood', 'soggy', 'sola', 'solemn', 'soli', 'sollar', 'solus', 'solute', 'solvent', 'somber', 'sombre', 'sombrous', 'sometime', 'sonant', 'songful', 'songless', 'sonless', 'sonsie', 'sonsy', 'soothfast', 'soothing', 'sopping', 'soppy', 'sordid', 'sorer', 'sorest', 'sorry', 'sotted', 'sottish', 'soulful', 'soulless', 'soundless', 'soundproof', 'soupy', 'sourish', 'southmost', 'southpaw', 'southward', 'sovran', 'sozzled', 'spaceless', 'spacial', 'spacious', 'spadelike', 'spangly', 'spanking', 'sparid', 'sparing', 'sparkless', 'sparkling', 'sparoid', 'sparry', 'sparser', 'sparsest', 'spastic', 'spathic', 'spathose', 'spatial', 'spavined', 'specious', 'speckled', 'speckless', 'speechless', 'speedful', 'speeding', 'speedless', 'speedy', 'spellbound', 'spendthrift', 'spermic', 'spermous', 'sphagnous', 'sphenic', 'spheral', 'sphereless', 'spherelike', 'spheric', 'sphery', 'sphygmic', 'sphygmoid', 'spicate', 'spicy', 'spiffing', 'spiffy', 'spiky', 'spindling', 'spindly', 'spineless', 'spinose', 'spinous', 'spiral', 'spirant', 'spireless', 'spiroid', 'spiry', 'spiteful', 'splanchnic', 'splashy', 'spleenful', 'spleenish', 'spleeny', 'splendent', 'splendid', 'splendrous', 'splenic', 'splitting', 'splurgy', 'spoken', 'spokewise', 'spongy', 'spooky', 'spoony', 'sportful', 'sportive', 'sportless', 'sporty', 'spotless', 'spotty', 'spousal', 'spouseless', 'spouted', 'spoutless', 'spriggy', 'sprightful', 'sprightly', 'springing', 'springless', 'springlike', 'springtime', 'springy', 'sprucer', 'sprucest', 'sprucing', 'spryer', 'spryest', 'spunky', 'spurless', 'squabby', 'squalid', 'squally', 'squamate', 'squamous', 'squarish', 'squarrose', 'squashy', 'squeaky', 'squeamish', 'squiffy', 'squiggly', 'squirmy', 'squirting', 'squishy', 'stabbing', 'stabile', 'stagey', 'stagnant', 'stagy', 'stalkless', 'stalky', 'stalwart', 'stalworth', 'stannous', 'staple', 'starboard', 'starchy', 'staring', 'starless', 'starlight', 'starlike', 'starring', 'starry', 'starveling', 'starving', 'statant', 'stated', 'stateless', 'stateside', 'statewide', 'statist', 'stative', 'statued', 'steadfast', 'stealthy', 'steamtight', 'steamy', 'stedfast', 'steepled', 'stelar', 'stellar', 'stellate', 'stemless', 'stenosed', 'stepwise', 'steric', 'sterile', 'sternal', 'sternmost', 'sthenic', 'stickit', 'stiffish', 'stifling', 'stilly', 'stilted', 'stingless', 'stingy', 'stinko', 'stintless', 'stirless', 'stirring', 'stockinged', 'stockish', 'stockless', 'stocky', 'stodgy', 'stolen', 'stolid', 'stoneground', 'stoneless', 'stoneware', 'stonkered', 'stopless', 'stopping', 'store', 'storeyed', 'storied', 'stormbound', 'stormless', 'stormproof', 'stotious', 'stoutish', 'straining', 'strangest', 'strapless', 'strapping', 'stratous', 'strawless', 'strawlike', 'streaky', 'streaming', 'streamless', 'streamlined', 'streamy', 'stressful', 'stretchy', 'striate', 'stricken', 'strident', 'strifeful', 'strifeless', 'strigose', 'stringent', 'stringless', 'stringy', 'stripeless', 'stripy', 'strobic', 'strongish', 'strophic', 'stroppy', 'structured', 'strutting', 'strychnic', 'stubbled', 'stubbly', 'stubborn', 'stubby', 'studied', 'stuffy', 'stumbling', 'stumpy', 'stunning', 'stupid', 'sturdied', 'sturdy', 'stutter', 'stylar', 'styleless', 'stylised', 'stylish', 'stylized', 'styloid', 'subdued', 'subfusc', 'subgrade', 'sublimed', 'submerged', 'submersed', 'submiss', 'subscribed', 'subscript', 'subtile', 'subtle', 'succinct', 'suchlike', 'suffused', 'sugared', 'suited', 'sulcate', 'sulfa', 'sulkies', 'sulky', 'sullen', 'sullied', 'sultry', 'sunbaked', 'sunbeamed', 'sunburnt', 'sunfast', 'sunken', 'sunless', 'sunlike', 'sunlit', 'sunproof', 'sunrise', 'sunset', 'sunward', 'super', 'superb', 'supine', 'supple', 'supposed', 'sural', 'surbased', 'surer', 'surest', 'surfy', 'surgeless', 'surging', 'surgy', 'surly', 'surpliced', 'surplus', 'surprised', 'suspect', 'svelter', 'sveltest', 'swainish', 'swampy', 'swanky', 'swaraj', 'swarthy', 'sweated', 'sweaty', 'sweeping', 'sweetmeal', 'swelling', 'sweptwing', 'swindled', 'swingeing', 'swinish', 'swirly', 'swishy', 'swordless', 'swordlike', 'sylphic', 'sylphid', 'sylphish', 'sylphy', 'sylvan', 'systemless', 'taboo', 'tabu', 'tacit', 'tacky', 'tactful', 'tactile', 'tactless', 'tailing', 'tailless', 'taillike', 'tailored', 'taintless', 'taken', 'taking', 'talcose', 'talking', 'talky', 'taloned', 'tameless', 'tamer', 'tamest', 'taming', 'tandem', 'tangier', 'tangled', 'tangy', 'tannic', 'tapeless', 'tapelike', 'tardy', 'tarmac', 'tarnal', 'tarot', 'tarry', 'tarsal', 'tartish', 'tasseled', 'tasselled', 'tasteful', 'tasteless', 'tasty', 'tattered', 'tatty', 'taurine', 'tawdry', 'tawie', 'tearful', 'tearing', 'tearless', 'teary', 'teasing', 'techy', 'teeming', 'teenage', 'teensy', 'teeny', 'telic', 'telling', 'telltale', 'tempered', 'templed', 'tempting', 'tender', 'tenfold', 'tenor', 'tenseless', 'tenser', 'tensest', 'tensing', 'tensive', 'tented', 'tentie', 'tentless', 'tenty', 'tepid', 'terbic', 'terete', 'tergal', 'termless', 'ternate', 'terrene', 'tertial', 'tertian', 'testate', 'testy', 'tetchy', 'textbook', 'textile', 'textless', 'textured', 'thallic', 'thalloid', 'thallous', 'thankful', 'thankless', 'thatchless', 'thecal', 'thecate', 'theism', 'theist', 'themeless', 'thenar', 'thermic', 'theroid', 'thetic', 'thickset', 'thievish', 'thinking', 'thinnish', 'thirdstream', 'thirstless', 'thirsty', 'thirteen', 'thistly', 'thornless', 'thorny', 'thoughtful', 'thoughtless', 'thousandth', 'thowless', 'thrashing', 'threadbare', 'threadlike', 'thready', 'threatful', 'threefold', 'threescore', 'thriftless', 'thrifty', 'thrilling', 'throaty', 'throbbing', 'throbless', 'throneless', 'throwback', 'thudding', 'thuggish', 'thumbless', 'thumblike', 'thumping', 'thymic', 'thymy', 'thyrsoid', 'ticklish', 'tiddly', 'tideless', 'tidied', 'tightknit', 'timbered', 'timeless', 'timely', 'timeous', 'timid', 'tingly', 'tinhorn', 'tinkling', 'tinkly', 'tinny', 'tinsel', 'tintless', 'tiny', 'tippy', 'tiptoe', 'tiptop', 'tireless', 'tiresome', 'titled', 'toeless', 'toey', 'togaed', 'togate', 'toilful', 'toilsome', 'tombless', 'tonal', 'toneless', 'tongueless', 'tonguelike', 'tonish', 'tonnish', 'tony', 'toothless', 'toothlike', 'toothsome', 'toothy', 'topfull', 'topless', 'topmost', 'torose', 'torpid', 'torquate', 'torrent', 'tortile', 'tortious', 'tortured', 'tother', 'touching', 'touchy', 'toughish', 'touring', 'tourist', 'toward', 'towered', 'townish', 'townless', 'towy', 'toxic', 'toyless', 'toylike', 'traceless', 'trackless', 'tractile', 'tractrix', 'trainless', 'tranquil', 'transcribed', 'transient', 'transposed', 'traplike', 'trappy', 'trashy', 'traveled', 'travelled', 'traverse', 'treacly', 'treasured', 'treen', 'trembling', 'trembly', 'trenchant', 'trendy', 'tressured', 'tressy', 'tribal', 'tribeless', 'trichoid', 'trickish', 'trickless', 'tricksome', 'tricksy', 'tricky', 'tricorn', 'trident', 'trifid', 'trifling', 'triform', 'trillion', 'trillionth', 'trilobed', 'trinal', 'triploid', 'trippant', 'tripping', 'tristful', 'triter', 'tritest', 'triune', 'trivalve', 'trochal', 'trochoid', 'trodden', 'trophic', 'trophied', 'tropic', 'troppo', 'trothless', 'troublous', 'truant', 'truceless', 'truer', 'truffled', 'truncate', 'trunnioned', 'trustful', 'trusting', 'trustless', 'trusty', 'truthful', 'truthless', 'tryptic', 'tsarism', 'tsarist', 'tubal', 'tubate', 'tubby', 'tubeless', 'tumbling', 'tumid', 'tuneful', 'tuneless', 'turbaned', 'turbid', 'turdine', 'turfy', 'turgent', 'turgid', 'tuskless', 'tussal', 'tussive', 'tutti', 'twaddly', 'tweedy', 'twelvefold', 'twenty', 'twiggy', 'twinkling', 'twinning', 'twofold', 'typal', 'typhous', 'typic', 'ugsome', 'ullaged', 'umber', 'umbral', 'umbrose', 'umpteen', 'umpteenth', 'unaimed', 'unaired', 'unapt', 'unarmed', 'unasked', 'unawed', 'unbacked', 'unbagged', 'unbaked', 'unbarbed', 'unbarred', 'unbathed', 'unbegged', 'unbent', 'unbid', 'unblamed', 'unbleached', 'unblenched', 'unblent', 'unblessed', 'unblocked', 'unblown', 'unboned', 'unborn', 'unborne', 'unbought', 'unbound', 'unbowed', 'unbraced', 'unbranched', 'unbreached', 'unbreathed', 'unbred', 'unbreeched', 'unbridged', 'unbroke', 'unbruised', 'unbrushed', 'unburned', 'unburnt', 'uncaged', 'uncalled', 'uncapped', 'uncashed', 'uncaught', 'uncaused', 'unchained', 'unchanged', 'uncharge', 'uncharged', 'uncharmed', 'unchaste', 'unchecked', 'uncheered', 'unchewed', 'unclad', 'unclaimed', 'unclassed', 'unclean', 'uncleaned', 'uncleansed', 'unclear', 'uncleared', 'unclimbed', 'unclipped', 'unclogged', 'unclutched', 'uncocked', 'uncoined', 'uncombed', 'uncooked', 'uncouth', 'uncropped', 'uncross', 'uncrowned', 'unculled', 'uncurbed', 'uncured', 'uncursed', 'uncurved', 'uncut', 'undamped', 'undeaf', 'undealt', 'undecked', 'undimmed', 'undipped', 'undocked', 'undone', 'undrained', 'undraped', 'undrawn', 'undreamed', 'undreamt', 'undress', 'undressed', 'undried', 'undrilled', 'undrowned', 'undrunk', 'undubbed', 'undue', 'undug', 'undulled', 'undyed', 'unfair', 'unfanned', 'unfeared', 'unfed', 'unfelled', 'unfelt', 'unfenced', 'unfiled', 'unfilled', 'unfilmed', 'unfine', 'unfired', 'unfirm', 'unfished', 'unfit', 'unflawed', 'unfledged', 'unflushed', 'unfooled', 'unforced', 'unforged', 'unformed', 'unfought', 'unfound', 'unframed', 'unfraught', 'unfree', 'unfunded', 'unfurred', 'ungalled', 'ungauged', 'ungeared', 'ungilt', 'ungirthed', 'unglad', 'unglazed', 'unglossed', 'ungloved', 'ungored', 'ungorged', 'ungowned', 'ungraced', 'ungrassed', 'ungrazed', 'ungroomed', 'unground', 'ungrown', 'ungrudged', 'ungual', 'unguessed', 'unguled', 'ungummed', 'ungyved', 'unhacked', 'unhailed', 'unhanged', 'unharmed', 'unhatched', 'unhealed', 'unheard', 'unhelped', 'unhewn', 'unhinged', 'unhired', 'unhooped', 'unhorsed', 'unhung', 'unhurt', 'unhusked', 'unique', 'unjust', 'unkempt', 'unkenned', 'unkept', 'unkind', 'unkinged', 'unkissed', 'unknelled', 'unlaid', 'unlearned', 'unlearnt', 'unleased', 'unled', 'unlet', 'unlike', 'unlimed', 'unlined', 'unlit', 'unlooked', 'unlopped', 'unlost', 'unloved', 'unmade', 'unmailed', 'unmaimed', 'unmanned', 'unmarked', 'unmarred', 'unmasked', 'unmatched', 'unmeant', 'unmeet', 'unmet', 'unmilked', 'unmilled', 'unmissed', 'unmixed', 'unmoaned', 'unmourned', 'unmoved', 'unmown', 'unnamed', 'unoiled', 'unowned', 'unpaced', 'unpaged', 'unpaid', 'unpained', 'unpaired', 'unpared', 'unpaved', 'unpeeled', 'unpent', 'unperched', 'unpicked', 'unpierced', 'unplaced', 'unplagued', 'unplanked', 'unplayed', 'unpleased', 'unpledged', 'unploughed', 'unplucked', 'unplumb', 'unplumbed', 'unplumed', 'unpoised', 'unpolled', 'unposed', 'unpraised', 'unpreached', 'unpressed', 'unpriced', 'unprimed', 'unprized', 'unpropped', 'unproved', 'unpruned', 'unpurged', 'unquelled', 'unquenched', 'unraised', 'unraked', 'unreached', 'unread', 'unreaped', 'unreined', 'unrent', 'unrhymed', 'unribbed', 'unrigged', 'unrimed', 'unripe', 'unroped', 'unrouged', 'unroused', 'unrubbed', 'unrude', 'unruled', 'unsafe', 'unsaid', 'unsailed', 'unsapped', 'unsashed', 'unsaved', 'unscaled', 'unscanned', 'unscarred', 'unscathed', 'unschooled', 'unscorched', 'unscoured', 'unscratched', 'unscreened', 'unsealed', 'unsearched', 'unseen', 'unseized', 'unsensed', 'unsent', 'unset', 'unshamed', 'unshaped', 'unshared', 'unshaved', 'unsheathed', 'unshed', 'unshipped', 'unshocked', 'unshod', 'unshoed', 'unshorn', 'unshown', 'unshrived', 'unshunned', 'unshut', 'unsight', 'unsigned', 'unsized', 'unskilled', 'unskimmed', 'unskinned', 'unslain', 'unsliced', 'unsluiced', 'unslung', 'unsmirched', 'unsmooth', 'unsmoothed', 'unsnuffed', 'unsoaped', 'unsoft', 'unsoiled', 'unsold', 'unsolved', 'unsought', 'unsound', 'unsown', 'unspared', 'unsparred', 'unspelled', 'unspent', 'unspied', 'unspilled', 'unspilt', 'unspoiled', 'unspoilt', 'unsprung', 'unspun', 'unsquared', 'unstack', 'unstacked', 'unstaid', 'unstained', 'unstamped', 'unstarched', 'unstilled', 'unstirred', 'unstitched', 'unstocked', 'unstopped', 'unstrained', 'unstreamed', 'unstressed', 'unstringed', 'unstriped', 'unstripped', 'unstrung', 'unstuck', 'unstuffed', 'unsucked', 'unsung', 'unsure', 'unswayed', 'unswept', 'unsworn', 'untailed', 'untame', 'untamed', 'untanned', 'untapped', 'untarred', 'untaught', 'unteamed', 'unthanked', 'unthawed', 'unthought', 'untied', 'untiled', 'untilled', 'untinged', 'untinned', 'untired', 'untold', 'untombed', 'untoned', 'untorn', 'untouched', 'untraced', 'untracked', 'untrained', 'untrenched', 'untressed', 'untried', 'untrimmed', 'untrod', 'untrue', 'unturfed', 'unturned', 'unurged', 'unused', 'unversed', 'unvexed', 'unviewed', 'unvoiced', 'unwaked', 'unwarmed', 'unwarned', 'unwarped', 'unwashed', 'unwatched', 'unweaned', 'unwebbed', 'unwed', 'unweened', 'unweighed', 'unwell', 'unwept', 'unwet', 'unwhipped', 'unwilled', 'unwinged', 'unwiped', 'unwired', 'unwise', 'unwished', 'unwitched', 'unwon', 'unwooed', 'unworked', 'unworn', 'unwound', 'unwrapped', 'unwrought', 'unwrung', 'upbeat', 'upbound', 'upcast', 'upgrade', 'uphill', 'upmost', 'uppish', 'upraised', 'upset', 'upstage', 'upstaged', 'upstair', 'upstairs', 'upstart', 'upstate', 'upstream', 'uptight', 'uptown', 'upturned', 'upward', 'upwind', 'urbane', 'urdy', 'urgent', 'urnfield', 'useful', 'useless', 'utile', 'utmost', 'vadose', 'vagal', 'vagrant', 'vagrom', 'vaguer', 'vaguest', 'valanced', 'valgus', 'valiant', 'valid', 'valval', 'valvar', 'valvate', 'vambraced', 'vaneless', 'vanward', 'vapid', 'varied', 'varus', 'vassal', 'vasty', 'vatic', 'vaulted', 'vaulting', 'vaunted', 'vaunting', 'vaunty', 'veilless', 'veiny', 'velar', 'velate', 'vellum', 'venal', 'vengeful', 'venose', 'venous', 'ventose', 'verbless', 'verbose', 'verdant', 'verism', 'verist', 'vespine', 'vestral', 'vibrant', 'viceless', 'viewless', 'viewy', 'villose', 'villous', 'vinous', 'viral', 'virgate', 'virile', 'visaged', 'viscid', 'viscose', 'viscous', 'vitric', 'vivid', 'vivo', 'vixen', 'voetstoots', 'vogie', 'voiceful', 'voiceless', 'voided', 'volant', 'volar', 'volumed', 'volvate', 'vorant', 'voteless', 'votive', 'vulpine', 'vying', 'wacky', 'wageless', 'waggish', 'waggly', 'wailful', 'wailing', 'waisted', 'wakeful', 'wakeless', 'wakerife', 'waking', 'walnut', 'wambly', 'wandle', 'waney', 'waning', 'wanner', 'wannest', 'wanning', 'wannish', 'wanting', 'wanton', 'warded', 'warlike', 'warming', 'warmish', 'warning', 'warring', 'wartless', 'wartlike', 'warty', 'wary', 'washy', 'waspish', 'waspy', 'wasted', 'wasteful', 'watchful', 'waveless', 'wavelike', 'waving', 'wavy', 'waxen', 'waxing', 'waxy', 'wayless', 'wayward', 'wayworn', 'weakly', 'weaponed', 'wearied', 'wearing', 'wearish', 'weary', 'weathered', 'webby', 'wedded', 'wedgy', 'weedy', 'weekday', 'weekly', 'weeny', 'weepy', 'weer', 'weest', 'weighted', 'weighty', 'welcome', 'weldless', 'westbound', 'western', 'wetter', 'wettish', 'whacking', 'whacky', 'whapping', "whate'er", 'wheaten', 'wheezing', 'wheezy', 'wheyey', 'whilom', 'whining', 'whinny', 'whiny', 'whiplike', 'whirring', 'whiskered', 'whitish', 'whittling', 'whity', 'wholesale', 'wholesome', 'whopping', 'whoreson', 'whorish', 'wicked', 'wicker', 'wider', 'widespread', 'widest', 'widish', 'wieldy', 'wifeless', 'wifely', 'wiggly', 'wigless', 'wiglike', 'wilful', 'willful', 'willing', 'willyard', 'wily', 'wimpy', 'windburned', 'winded', 'windproof', 'windswept', 'windy', 'wingless', 'winglike', 'wintry', 'winy', 'wiretap', 'wiring', 'wiry', 'wiser', 'wisest', 'wising', 'wispy', 'wistful', 'witchy', 'withdrawn', 'withy', 'witless', 'witted', 'witting', 'witty', 'wizard', 'wizen', 'wizened', 'woaded', 'wobbling', 'woeful', 'woesome', 'wolfish', 'wonky', 'wonted', 'wooded', 'woodless', 'woodsy', 'woodwind', 'woolen', 'woollen', 'woozier', 'woozy', 'wordless', 'wordy', 'workless', 'worldly', 'worldwide', 'wormy', 'worried', 'worser', 'worshipped', 'worthless', 'worthwhile', 'worthy', 'wounded', 'woundless', 'woven', 'wrapround', 'wrathful', 'wrathless', 'wreathless', 'wreckful', 'wretched', 'wrier', 'wriest', 'wriggly', 'wrinkly', 'writhen', 'writhing', 'written', 'wrongful', 'xanthous', 'xerarch', 'xeric', 'xiphoid', 'xylic', 'xyloid', 'yarer', 'yarest', 'yawning', 'yclept', 'yearling', 'yearlong', 'yearly', 'yearning', 'yeastlike', 'yeasty', 'yester', 'yestern', 'yielding', 'yogic', 'yolky', 'yonder', 'younger', 'youthful', 'yttric', 'yuletide', 'zany', 'zealous', 'zebrine', 'zeroth', 'zestful', 'zesty', 'zigzag', 'zillion', 'zincky', 'zincoid', 'zincous', 'zincy', 'zingy', 'zinky', 'zippy', 'zonate', 'zoning'] -------------------------------------------------------------------------------- /deep_tabular/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model package""" 2 | #from .boosting import catboost, xgboost 3 | from .ft_transformer import ft_transformer, ft_tokenizer, ft_backbone 4 | from .mlp import mlp 5 | from .resnet import resnet 6 | 7 | __all__ = ["ft_transformer", 8 | "ft_tokenizer", 9 | "ft_backbone", 10 | "mlp", 11 | "resnet" 12 | ] 13 | -------------------------------------------------------------------------------- /deep_tabular/models/ft_transformer.py: -------------------------------------------------------------------------------- 1 | """ ft_transformer.py 2 | FT Transformer model class 3 | Adopted from https://github.com/Yura52/rtdl 4 | March 2022 5 | """ 6 | 7 | import math 8 | import typing as ty 9 | 10 | # from icecream import ic 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.nn.init as nn_init 15 | from torch import Tensor 16 | 17 | 18 | def reglu(x: Tensor) -> Tensor: 19 | a, b = x.chunk(2, dim=-1) 20 | return a * F.relu(b) 21 | 22 | 23 | class Tokenizer(nn.Module): 24 | category_offsets: ty.Optional[Tensor] 25 | 26 | def __init__(self, d_numerical: int, categories: ty.Optional[ty.List[int]], d_token: int, bias: bool) -> None: 27 | super().__init__() 28 | if categories is None: 29 | d_bias = d_numerical 30 | self.category_offsets = None 31 | self.category_embeddings = None 32 | else: 33 | d_bias = d_numerical + len(categories) 34 | category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0) 35 | self.register_buffer("category_offsets", category_offsets) 36 | self.category_embeddings = nn.Embedding(sum(categories), d_token) 37 | nn_init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5)) 38 | 39 | # take [CLS] token into account 40 | self.weight = nn.Parameter(Tensor(d_numerical + 1, d_token)) 41 | self.bias = nn.Parameter(Tensor(d_bias, d_token)) if bias else None 42 | # The initialization is inspired by nn.Linear 43 | nn_init.kaiming_uniform_(self.weight, a=math.sqrt(5)) 44 | if self.bias is not None: 45 | nn_init.kaiming_uniform_(self.bias, a=math.sqrt(5)) 46 | 47 | @property 48 | def n_tokens(self) -> int: 49 | return len(self.weight) + ( 50 | 0 if self.category_offsets is None else len(self.category_offsets) 51 | ) 52 | 53 | def forward(self, x_num: Tensor, x_cat: ty.Optional[Tensor]) -> Tensor: 54 | x_some = x_num if x_cat is None else x_cat 55 | assert x_some is not None 56 | x_num = torch.cat( 57 | [torch.ones(len(x_some), 1, device=x_some.device)] # [CLS] 58 | + ([] if x_num is None else [x_num]), 59 | dim=1, 60 | ) 61 | x = self.weight[None] * x_num[:, :, None] 62 | if x_cat is not None: 63 | x = torch.cat( 64 | [x, self.category_embeddings(x_cat + self.category_offsets[None])], 65 | dim=1, 66 | ) 67 | if self.bias is not None: 68 | bias = torch.cat( 69 | [ 70 | torch.zeros(1, self.bias.shape[1], device=x.device), 71 | self.bias, 72 | ] 73 | ) 74 | x = x + bias[None] 75 | return x 76 | 77 | 78 | class MultiheadAttention(nn.Module): 79 | def __init__( 80 | self, d: int, n_heads: int, dropout: float, initialization: str 81 | ) -> None: 82 | if n_heads > 1: 83 | assert d % n_heads == 0 84 | assert initialization in ["xavier", "kaiming"] 85 | 86 | super().__init__() 87 | self.W_q = nn.Linear(d, d) 88 | self.W_k = nn.Linear(d, d) 89 | self.W_v = nn.Linear(d, d) 90 | self.W_out = nn.Linear(d, d) if n_heads > 1 else None 91 | self.n_heads = n_heads 92 | self.dropout = nn.Dropout(dropout) if dropout else None 93 | 94 | for m in [self.W_q, self.W_k, self.W_v]: 95 | if initialization == "xavier" and (n_heads > 1 or m is not self.W_v): 96 | # gain is needed since W_qkv is represented with 3 separate layers 97 | nn_init.xavier_uniform_(m.weight, gain=1 / math.sqrt(2)) 98 | nn_init.zeros_(m.bias) 99 | if self.W_out is not None: 100 | nn_init.zeros_(self.W_out.bias) 101 | 102 | def _reshape(self, x: Tensor) -> Tensor: 103 | batch_size, n_tokens, d = x.shape 104 | d_head = d // self.n_heads 105 | return ( 106 | x.reshape(batch_size, n_tokens, self.n_heads, d_head) 107 | .transpose(1, 2) 108 | .reshape(batch_size * self.n_heads, n_tokens, d_head) 109 | ) 110 | 111 | def forward( 112 | self, 113 | x_q: Tensor, 114 | x_kv: Tensor, 115 | key_compression: ty.Optional[nn.Linear], 116 | value_compression: ty.Optional[nn.Linear], 117 | ) -> Tensor: 118 | q, k, v = self.W_q(x_q), self.W_k(x_kv), self.W_v(x_kv) 119 | for tensor in [q, k, v]: 120 | assert tensor.shape[-1] % self.n_heads == 0 121 | if key_compression is not None: 122 | assert value_compression is not None 123 | k = key_compression(k.transpose(1, 2)).transpose(1, 2) 124 | v = value_compression(v.transpose(1, 2)).transpose(1, 2) 125 | else: 126 | assert value_compression is None 127 | 128 | batch_size = len(q) 129 | d_head_key = k.shape[-1] // self.n_heads 130 | d_head_value = v.shape[-1] // self.n_heads 131 | n_q_tokens = q.shape[1] 132 | 133 | q = self._reshape(q) 134 | k = self._reshape(k) 135 | attention = F.softmax(q @ k.transpose(1, 2) / math.sqrt(d_head_key), dim=-1) 136 | if self.dropout is not None: 137 | attention = self.dropout(attention) 138 | x = attention @ self._reshape(v) 139 | x = ( 140 | x.reshape(batch_size, self.n_heads, n_q_tokens, d_head_value) 141 | .transpose(1, 2) 142 | .reshape(batch_size, n_q_tokens, self.n_heads * d_head_value) 143 | ) 144 | if self.W_out is not None: 145 | x = self.W_out(x) 146 | return x 147 | 148 | 149 | class FTTransformer(nn.Module): 150 | """Transformer. 151 | 152 | References: 153 | - https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html 154 | - https://github.com/facebookresearch/pytext/tree/master/pytext/models/representations/transformer 155 | - https://github.com/pytorch/fairseq/blob/1bba712622b8ae4efb3eb793a8a40da386fe11d0/examples/linformer/linformer_src/modules/multihead_linear_attention.py#L19 156 | """ 157 | 158 | def __init__(self, 159 | d_numerical, 160 | d_out, 161 | categories, 162 | d_embedding, 163 | token_bias, 164 | n_layers, 165 | n_heads, 166 | d_ffn_factor, 167 | attention_dropout, 168 | ffn_dropout, 169 | residual_dropout, 170 | activation, 171 | prenormalization, 172 | initialization, 173 | kv_compression, 174 | kv_compression_sharing): 175 | assert (kv_compression is None) ^ (kv_compression_sharing is not None) 176 | 177 | super().__init__() 178 | self.tokenizer = Tokenizer(d_numerical, categories, d_embedding, token_bias) 179 | n_tokens = self.tokenizer.n_tokens 180 | 181 | def make_kv_compression(): 182 | assert kv_compression 183 | compression = nn.Linear( 184 | n_tokens, int(n_tokens * kv_compression), bias=False 185 | ) 186 | if initialization == "xavier": 187 | nn_init.xavier_uniform_(compression.weight) 188 | return compression 189 | 190 | self.shared_kv_compression = ( 191 | make_kv_compression() 192 | if kv_compression and kv_compression_sharing == "layerwise" 193 | else None 194 | ) 195 | 196 | def make_normalization(): 197 | return nn.LayerNorm(d_embedding) 198 | 199 | d_hidden = int(d_embedding * d_ffn_factor) 200 | self.layers = nn.ModuleList([]) 201 | for layer_idx in range(n_layers): 202 | layer = nn.ModuleDict( 203 | { 204 | "attention": MultiheadAttention( 205 | d_embedding, n_heads, attention_dropout, initialization 206 | ), 207 | "linear0": nn.Linear( 208 | d_embedding, d_hidden * (2 if activation.endswith("glu") else 1) 209 | ), 210 | "linear1": nn.Linear(d_hidden, d_embedding), 211 | "norm1": make_normalization(), 212 | } 213 | ) 214 | if not prenormalization or layer_idx: 215 | layer["norm0"] = make_normalization() 216 | if kv_compression and self.shared_kv_compression is None: 217 | layer["key_compression"] = make_kv_compression() 218 | if kv_compression_sharing == "headwise": 219 | layer["value_compression"] = make_kv_compression() 220 | else: 221 | assert kv_compression_sharing == "key-value" 222 | self.layers.append(layer) 223 | 224 | self.activation = reglu 225 | self.last_activation = F.relu 226 | self.prenormalization = prenormalization 227 | self.last_normalization = make_normalization() if prenormalization else None 228 | self.ffn_dropout = ffn_dropout 229 | self.residual_dropout = residual_dropout 230 | self.head = nn.Linear(d_embedding, d_out) 231 | 232 | def _get_kv_compressions(self, layer): 233 | return ( 234 | (self.shared_kv_compression, self.shared_kv_compression) 235 | if self.shared_kv_compression is not None 236 | else (layer["key_compression"], layer["value_compression"]) 237 | if "key_compression" in layer and "value_compression" in layer 238 | else (layer["key_compression"], layer["key_compression"]) 239 | if "key_compression" in layer 240 | else (None, None) 241 | ) 242 | 243 | def _start_residual(self, x, layer, norm_idx): 244 | x_residual = x 245 | if self.prenormalization: 246 | norm_key = f"norm{norm_idx}" 247 | if norm_key in layer: 248 | x_residual = layer[norm_key](x_residual) 249 | return x_residual 250 | 251 | def _end_residual(self, x, x_residual, layer, norm_idx): 252 | if self.residual_dropout: 253 | x_residual = F.dropout(x_residual, self.residual_dropout, self.training) 254 | x = x + x_residual 255 | if not self.prenormalization: 256 | x = layer[f"norm{norm_idx}"](x) 257 | return x 258 | 259 | def forward(self, x_num, x_cat): 260 | x = self.tokenizer(x_num, x_cat) 261 | 262 | for layer_idx, layer in enumerate(self.layers): 263 | is_last_layer = layer_idx + 1 == len(self.layers) 264 | layer = ty.cast(ty.Dict[str, nn.Module], layer) 265 | 266 | x_residual = self._start_residual(x, layer, 0) 267 | x_residual = layer["attention"]( 268 | # for the last attention, it is enough to process only [CLS] 269 | (x_residual[:, :1] if is_last_layer else x_residual), 270 | x_residual, 271 | *self._get_kv_compressions(layer), 272 | ) 273 | if is_last_layer: 274 | x = x[:, : x_residual.shape[1]] 275 | x = self._end_residual(x, x_residual, layer, 0) 276 | 277 | x_residual = self._start_residual(x, layer, 1) 278 | x_residual = layer["linear0"](x_residual) 279 | x_residual = self.activation(x_residual) 280 | if self.ffn_dropout: 281 | x_residual = F.dropout(x_residual, self.ffn_dropout, self.training) 282 | x_residual = layer["linear1"](x_residual) 283 | x = self._end_residual(x, x_residual, layer, 1) 284 | 285 | assert x.shape[1] == 1 286 | x = x[:, 0] 287 | if self.last_normalization is not None: 288 | x = self.last_normalization(x) 289 | x = self.last_activation(x) 290 | x = self.head(x) 291 | x = x.squeeze(-1) 292 | return x 293 | 294 | 295 | class FTBackbone(nn.Module): 296 | 297 | def __init__(self, d_embedding, n_layers, n_heads, d_ffn_factor, attention_dropout, ffn_dropout, 298 | residual_dropout, activation, prenormalization, initialization): 299 | super().__init__() 300 | 301 | d_hidden = int(d_embedding * d_ffn_factor) 302 | self.layers = nn.ModuleList([]) 303 | for layer_idx in range(n_layers): 304 | layer = nn.ModuleDict( 305 | { 306 | "attention": MultiheadAttention( 307 | d_embedding, n_heads, attention_dropout, initialization 308 | ), 309 | "linear0": nn.Linear( 310 | d_embedding, d_hidden * (2 if activation.endswith("glu") else 1) 311 | ), 312 | "linear1": nn.Linear(d_hidden, d_embedding), 313 | "norm1": nn.LayerNorm(d_embedding), 314 | } 315 | ) 316 | if not prenormalization or layer_idx: 317 | layer["norm0"] = nn.LayerNorm(d_embedding) 318 | self.layers.append(layer) 319 | 320 | self.activation = reglu 321 | self.last_activation = F.relu 322 | self.prenormalization = prenormalization 323 | self.last_normalization = nn.LayerNorm(d_embedding) if prenormalization else None 324 | self.ffn_dropout = ffn_dropout 325 | self.residual_dropout = residual_dropout 326 | 327 | def _start_residual(self, x, layer, norm_idx): 328 | x_residual = x 329 | if self.prenormalization: 330 | norm_key = f"norm{norm_idx}" 331 | if norm_key in layer: 332 | x_residual = layer[norm_key](x_residual) 333 | return x_residual 334 | 335 | def _end_residual(self, x, x_residual, layer, norm_idx): 336 | if self.residual_dropout: 337 | x_residual = F.dropout(x_residual, self.residual_dropout, self.training) 338 | x = x + x_residual 339 | if not self.prenormalization: 340 | x = layer[f"norm{norm_idx}"](x) 341 | return x 342 | 343 | def forward(self, x): 344 | 345 | for layer_idx, layer in enumerate(self.layers): 346 | is_last_layer = layer_idx + 1 == len(self.layers) 347 | layer = ty.cast(ty.Dict[str, nn.Module], layer) 348 | 349 | x_residual = self._start_residual(x, layer, 0) 350 | x_residual = layer["attention"]( 351 | # for the last attention, it is enough to process only [CLS] 352 | (x_residual[:, :1] if is_last_layer else x_residual), 353 | x_residual, 354 | None, 355 | None, 356 | ) 357 | if is_last_layer: 358 | x = x[:, : x_residual.shape[1]] 359 | x = self._end_residual(x, x_residual, layer, 0) 360 | 361 | x_residual = self._start_residual(x, layer, 1) 362 | x_residual = layer["linear0"](x_residual) 363 | x_residual = self.activation(x_residual) 364 | if self.ffn_dropout: 365 | x_residual = F.dropout(x_residual, self.ffn_dropout, self.training) 366 | x_residual = layer["linear1"](x_residual) 367 | x = self._end_residual(x, x_residual, layer, 1) 368 | 369 | assert x.shape[1] == 1 370 | x = x[:, 0] 371 | if self.last_normalization is not None: 372 | x = self.last_normalization(x) 373 | x = self.last_activation(x) 374 | return x 375 | 376 | 377 | def ft_transformer(num_numerical, unique_categories, num_outputs, d_embedding, model_params): 378 | return FTTransformer(num_numerical, num_outputs, unique_categories, d_embedding, 379 | model_params.token_bias, 380 | model_params.n_layers, 381 | model_params.n_heads, 382 | model_params.d_ffn_factor, 383 | model_params.attention_dropout, 384 | model_params.ffn_dropout, 385 | model_params.residual_dropout, 386 | model_params.activation, 387 | model_params.prenormalization, 388 | model_params.initialization, 389 | model_params.kv_compression, 390 | model_params.kv_compression_sharing) 391 | 392 | 393 | def ft_tokenizer(num_numerical, unique_categories, d_embedding, token_bias): 394 | return Tokenizer(num_numerical, unique_categories, d_embedding, token_bias) 395 | 396 | 397 | def ft_backbone(model_params): 398 | return FTBackbone(model_params.d_embedding, 399 | model_params.n_layers, 400 | model_params.n_heads, 401 | model_params.d_ffn_factor, 402 | model_params.attention_dropout, 403 | model_params.ffn_dropout, 404 | model_params.residual_dropout, 405 | model_params.activation, 406 | model_params.prenormalization, 407 | model_params.initialization) 408 | -------------------------------------------------------------------------------- /deep_tabular/models/mlp.py: -------------------------------------------------------------------------------- 1 | """ mlp.py 2 | MLP model calss 3 | Adopted from https://github.com/Yura52/rtdl 4 | March 2022 5 | """ 6 | 7 | import math 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class MLP(nn.Module): 15 | def __init__(self, d_in, d_out, categories, d_embedding, d_layers, dropout): 16 | super().__init__() 17 | 18 | # if we have categorical data 19 | if categories is not None: 20 | # update d_in to account for the number of 21 | # TODO Why isn't d_in correct to begin with? Does this mean it is just dimension of numerical data? 22 | d_in += len(categories) * d_embedding 23 | 24 | # compute offsets so that categorical features do not overlap 25 | category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0) 26 | self.register_buffer("category_offsets", category_offsets) 27 | self.category_embeddings = nn.Embedding(sum(categories), d_embedding) 28 | nn.init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5)) 29 | 30 | self.layers = nn.ModuleList([nn.Linear(d_layers[i - 1] if i else d_in, x) for i, x in enumerate(d_layers)]) 31 | self.dropout = dropout 32 | self.head = nn.Linear(d_layers[-1] if d_layers else d_in, d_out) 33 | 34 | def forward(self, x_num, x_cat): 35 | x = [] 36 | if x_num is not None: 37 | x.append(x_num) 38 | if x_cat is not None: 39 | x.append(self.category_embeddings(x_cat + self.category_offsets[None]).view(x_cat.size(0), -1)) 40 | x = torch.cat(x, dim=-1) 41 | 42 | for layer in self.layers: 43 | x = layer(x) 44 | x = F.relu(x) 45 | if self.dropout: 46 | x = F.dropout(x, self.dropout, self.training) 47 | x = self.head(x) 48 | x = x.squeeze(-1) 49 | return x 50 | 51 | 52 | def mlp(num_numerical, unique_categories, num_outputs, d_embedding, model_params): 53 | return MLP(num_numerical, num_outputs, unique_categories, d_embedding, model_params.d_layers, model_params.dropout) 54 | -------------------------------------------------------------------------------- /deep_tabular/models/resnet.py: -------------------------------------------------------------------------------- 1 | """ resnet.py 2 | ResNet model calss 3 | Adopted from https://github.com/Yura52/rtdl 4 | March 2022 5 | """ 6 | 7 | import math 8 | import typing as ty 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class ResNet(nn.Module): 15 | def __init__(self, 16 | d_numerical, 17 | d_out, 18 | categories, 19 | d_embedding, 20 | d, 21 | d_hidden_factor, 22 | n_layers, 23 | activation, 24 | normalization, 25 | hidden_dropout, 26 | residual_dropout): 27 | super().__init__() 28 | 29 | def make_normalization(): 30 | return {'batchnorm': nn.BatchNorm1d, 'layernorm': nn.LayerNorm}[ 31 | normalization 32 | ](d) 33 | 34 | self.main_activation = F.relu 35 | self.last_activation = F.relu 36 | self.residual_dropout = residual_dropout 37 | self.hidden_dropout = hidden_dropout 38 | 39 | d_in = d_numerical 40 | d_hidden = int(d * d_hidden_factor) 41 | 42 | if categories is not None: 43 | d_in += len(categories) * d_embedding 44 | category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0) 45 | self.register_buffer('category_offsets', category_offsets) 46 | self.category_embeddings = nn.Embedding(sum(categories), d_embedding) 47 | nn.init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5)) 48 | 49 | self.first_layer = nn.Linear(d_in, d) 50 | self.layers = nn.ModuleList( 51 | [ 52 | nn.ModuleDict( 53 | { 54 | 'norm': make_normalization(), 55 | 'linear0': nn.Linear( 56 | d, d_hidden * (2 if activation.endswith('glu') else 1) 57 | ), 58 | 'linear1': nn.Linear(d_hidden, d), 59 | } 60 | ) 61 | for _ in range(n_layers) 62 | ] 63 | ) 64 | self.last_normalization = make_normalization() 65 | self.head = nn.Linear(d, d_out) 66 | 67 | def forward(self, x_num, x_cat): 68 | x = [] 69 | if x_num is not None: 70 | x.append(x_num) 71 | if x_cat is not None: 72 | x.append( 73 | self.category_embeddings(x_cat + self.category_offsets[None]).view( 74 | x_cat.size(0), -1 75 | ) 76 | ) 77 | x = torch.cat(x, dim=-1) 78 | 79 | x = self.first_layer(x) 80 | for layer in self.layers: 81 | layer = ty.cast(ty.Dict[str, nn.Module], layer) 82 | z = x 83 | z = layer['norm'](z) 84 | z = layer['linear0'](z) 85 | z = self.main_activation(z) 86 | if self.hidden_dropout: 87 | z = F.dropout(z, self.hidden_dropout, self.training) 88 | z = layer['linear1'](z) 89 | if self.residual_dropout: 90 | z = F.dropout(z, self.residual_dropout, self.training) 91 | x = x + z 92 | x = self.last_normalization(x) 93 | x = self.last_activation(x) 94 | x = self.head(x) 95 | x = x.squeeze(-1) 96 | return x 97 | 98 | 99 | def resnet(num_numerical, unique_categories, num_outputs, d_embedding, model_params): 100 | return ResNet(num_numerical, num_outputs, unique_categories, d_embedding, 101 | model_params.d, 102 | model_params.d_hidden_factor, 103 | model_params.n_layers, 104 | model_params.activation, 105 | model_params.normalization, 106 | model_params.hidden_dropout, 107 | model_params.residual_dropout) 108 | -------------------------------------------------------------------------------- /deep_tabular/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_tools import get_data_openml, get_categories_full_cat_data, TabularDataset 2 | from .tools import generate_run_id 3 | from .tools import get_backbone 4 | from .tools import get_criterion 5 | from .tools import get_dataloaders 6 | from .tools import get_embedder 7 | from .tools import get_head 8 | from .tools import get_optimizer_for_backbone, get_optimizer_for_single_net 9 | from .tools import load_transfer_model_from_checkpoint, load_model_from_checkpoint 10 | from .tools import write_to_tb 11 | 12 | __all__ = ["generate_run_id", 13 | "get_backbone", 14 | "get_categories_full_cat_data", 15 | "get_data_openml", 16 | "get_dataloaders", 17 | "get_embedder", 18 | "get_head", 19 | "get_optimizer_for_backbone", 20 | "get_optimizer_for_single_net", 21 | "load_transfer_model_from_checkpoint", 22 | "load_model_from_checkpoint", 23 | "TabularDataset", 24 | "write_to_tb"] 25 | -------------------------------------------------------------------------------- /deep_tabular/utils/data_tools.py: -------------------------------------------------------------------------------- 1 | """ data_tools.py 2 | Tools for building tabular datasets 3 | Developed for Tabular Transfer Learning project 4 | April 2022 5 | Some functionality adopted from https://github.com/Yura52/rtdl 6 | """ 7 | 8 | import logging 9 | import os 10 | import warnings 11 | from copy import deepcopy 12 | from dataclasses import dataclass 13 | from typing import Optional, Dict, Any 14 | 15 | # from icecream import ic 16 | import numpy as np 17 | import openml 18 | import pandas as pd 19 | import sklearn.preprocessing 20 | import torch 21 | from sklearn.preprocessing import LabelEncoder 22 | from sklearn.model_selection import train_test_split 23 | import pickle 24 | 25 | # Ignore statements for pylint: 26 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 27 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 28 | # Too many local variables (R0914), Missing docstring (C0116, C0115). 29 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115 30 | 31 | 32 | def get_categories_full_cat_data(full_cat_data_for_encoder): 33 | return ( 34 | None 35 | if full_cat_data_for_encoder is None 36 | else [ 37 | len(set(full_cat_data_for_encoder.values[:, i])) 38 | for i in range(full_cat_data_for_encoder.shape[1]) 39 | ] 40 | ) 41 | 42 | 43 | def get_data_openml(dataset_id): 44 | dataset = openml.datasets.get_dataset(dataset_id) 45 | data, targets, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe", 46 | target=dataset.default_target_attribute) 47 | categorical_columns = list(data.columns[np.array(categorical_indicator)]) 48 | numerical_columns = list(data.columns[~np.array(categorical_indicator)]) 49 | return data, targets, categorical_columns, numerical_columns 50 | 51 | 52 | def get_data_locally(ds_id): 53 | if os.path.exists(f'../../../data/{ds_id}/N.csv'): 54 | X_full_num = pd.read_csv(f'../../../data/{ds_id}/N.csv') 55 | numerical_columns = list(X_full_num.columns) 56 | else: 57 | X_full_num = pd.DataFrame() 58 | numerical_columns = [] 59 | if os.path.exists(f'../../../data/{ds_id}/C.csv'): 60 | X_full_cat = pd.read_csv(f'../../../data/{ds_id}/C.csv') 61 | categorical_columns = list(X_full_cat.columns) 62 | else: 63 | X_full_cat = pd.DataFrame() 64 | categorical_columns = [] 65 | 66 | X_full = pd.concat([X_full_num, X_full_cat], axis = 1) 67 | y_full = pd.read_csv(f'../../../data/{ds_id}/y.csv') 68 | 69 | if y_full.shape[1] == 1: 70 | y_full = y_full.iloc[:, 0] 71 | else: 72 | raise ValueError('Targets have more than one column and the task is not multilabel') 73 | 74 | 75 | return X_full, y_full, categorical_columns, numerical_columns 76 | 77 | def get_data(dataset_id, source, task, datasplit=[.65, .15, .2]): 78 | """ 79 | Function to read and prepare a multiclass/binclass/regression dataset 80 | """ 81 | seed = 0 82 | np.random.seed(seed) 83 | 84 | if source == 'openml': 85 | data, targets, categorical_columns, numerical_columns = get_data_openml(dataset_id) 86 | elif source == 'local': 87 | data, targets, categorical_columns, numerical_columns = get_data_locally(dataset_id) 88 | np.random.seed(seed) 89 | # Fixes some bugs in openml datasets 90 | if targets.dtype.name == "category": 91 | targets = targets.apply(str).astype('object') 92 | 93 | for col in categorical_columns: 94 | data[col] = data[col].apply(str).astype("object") 95 | 96 | # reindex and find NaNs/Missing values in categorical columns 97 | data, targets = data.reset_index(drop=True), targets.reset_index(drop=True) 98 | data[categorical_columns] = data[categorical_columns].fillna("___null___") 99 | 100 | if task != 'regression': 101 | l_enc = LabelEncoder() 102 | targets = l_enc.fit_transform(targets) 103 | else: 104 | targets = targets.to_numpy() 105 | 106 | # split data into train/val/test 107 | train_size, test_size, valid_size = datasplit[0], datasplit[2], datasplit[1]/(1-datasplit[2]) 108 | if task != 'regression': 109 | data_train, data_test, targets_train, targets_test = train_test_split(data, targets, test_size=test_size, random_state=seed, stratify = targets) 110 | data_train, data_val, targets_train, targets_val = train_test_split(data_train, targets_train, test_size=valid_size, random_state=seed, stratify = targets_train) 111 | else: 112 | data_train, data_test, targets_train, targets_test = train_test_split(data, targets, test_size=test_size, random_state=seed) 113 | data_train, data_val, targets_train, targets_val = train_test_split(data_train, targets_train, test_size=valid_size, random_state=seed) 114 | 115 | 116 | 117 | data_cat_train = data_train[categorical_columns].values 118 | data_num_train = data_train[numerical_columns].values 119 | 120 | data_cat_val = data_val[categorical_columns].values 121 | data_num_val = data_val[numerical_columns].values 122 | 123 | data_cat_test = data_test[categorical_columns].values 124 | data_num_test = data_test[numerical_columns].values 125 | 126 | info = {"name": dataset_id, 127 | "task_type": task, 128 | "n_num_features": len(numerical_columns), 129 | "n_cat_features": len(categorical_columns), 130 | "train_size": data_train.shape[0], 131 | "val_size": data_val.shape[0], 132 | "test_size": data_test.shape[0]} 133 | 134 | if task == "multiclass": 135 | info["n_classes"] = len(set(targets)) 136 | if task == "binclass": 137 | info["n_classes"] = 1 138 | if task == "regression": 139 | info["n_classes"] = 1 140 | 141 | if len(numerical_columns) > 0: 142 | numerical_data = {"train": data_num_train, "val": data_num_val, "test": data_num_test} 143 | else: 144 | numerical_data = None 145 | 146 | if len(categorical_columns) > 0: 147 | categorical_data = {"train": data_cat_train, "val": data_cat_val, "test": data_cat_test} 148 | else: 149 | categorical_data = None 150 | 151 | targets = {"train": targets_train, "val": targets_val, "test": targets_test} 152 | 153 | if len(categorical_columns) > 0: 154 | full_cat_data_for_encoder = data[categorical_columns] 155 | else: 156 | full_cat_data_for_encoder = None 157 | 158 | return numerical_data, categorical_data, targets, info, full_cat_data_for_encoder 159 | 160 | 161 | def get_multilabel_data(ds_id, source, task): 162 | """ 163 | Function to read and prepare a multi-label dataset -- handling of multiple targets is slightly different from the other cases 164 | """ 165 | if source != 'local': 166 | raise ValueError("Only locally stored multilabel datasets are accepted. If it is local, double check 'source: local' in dataset config") 167 | seed = 0 168 | np.random.seed(seed) 169 | if os.path.exists(f'../../../data/{ds_id}/N.csv'): 170 | X_full_num = pd.read_csv(f'../../../data/{ds_id}/N.csv') 171 | numerical_columns = list(X_full_num.columns) 172 | else: 173 | X_full_num = pd.DataFrame() 174 | numerical_columns = [] 175 | if os.path.exists(f'../../../data/{ds_id}/C.csv'): 176 | X_full_cat = pd.read_csv(f'../../../data/{ds_id}/C.csv') 177 | categorical_columns = list(X_full_cat.columns) 178 | else: 179 | X_full_cat = pd.DataFrame() 180 | categorical_columns = [] 181 | 182 | X_full = pd.concat([X_full_num, X_full_cat], axis = 1) 183 | y_full = pd.read_csv(f'../../../data/{ds_id}/y.csv') 184 | 185 | X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=1) 186 | X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1875, random_state=1) 187 | 188 | X_train[categorical_columns] = X_train[categorical_columns].fillna("MissingValue") 189 | X_val[categorical_columns] = X_val[categorical_columns].fillna("MissingValue") 190 | X_test[categorical_columns] = X_test[categorical_columns].fillna("MissingValue") 191 | # print(numerical_columns) 192 | # print(categorical_columns) 193 | 194 | X_cat_train = X_train[categorical_columns].values 195 | X_num_train = X_train[numerical_columns].values.astype('float') 196 | y_train = y_train.values.astype('float') 197 | 198 | X_cat_val = X_val[categorical_columns].values 199 | X_num_val = X_val[numerical_columns].values.astype('float') 200 | y_val = y_val.values.astype('float') 201 | 202 | X_cat_test = X_test[categorical_columns].values 203 | X_num_test = X_test[numerical_columns].values.astype('float') 204 | y_test = y_test.values.astype('float') 205 | 206 | info = {} 207 | info['name'] = ds_id 208 | info['task_type'] = task 209 | info['n_num_features'] = len(numerical_columns) 210 | info['n_cat_features'] = len(categorical_columns) 211 | info['train_size'] = X_train.shape[0] 212 | info['val_size'] = X_val.shape[0] 213 | info['test_size'] = X_test.shape[0] 214 | 215 | 216 | if len(y_train.shape) > 1: 217 | info['n_classes'] = y_train.shape[1] 218 | else: 219 | info['n_classes'] = 1 220 | 221 | if len(numerical_columns) > 0: 222 | numerical_data = {'train': X_num_train, 'val': X_num_val, 'test': X_num_test} 223 | else: 224 | numerical_data = None 225 | 226 | if len(categorical_columns) > 0: 227 | categorical_data = {'train': X_cat_train, 'val': X_cat_val, 'test': X_cat_test} 228 | else: 229 | categorical_data = None 230 | 231 | targets = {'train': y_train, 'val': y_val, 'test': y_test} 232 | print('\n Train size:{} Val size:{} Test size:{}'.format(len(y_train), len(y_val), len(y_test))) 233 | 234 | if len(categorical_columns) > 0: 235 | full_cat_data_for_encoder = X_full[categorical_columns] 236 | else: 237 | full_cat_data_for_encoder = None 238 | return numerical_data, categorical_data, targets, info, full_cat_data_for_encoder 239 | 240 | @dataclass 241 | class TabularDataset: 242 | x_num: Optional[Dict[str, np.ndarray]] 243 | x_cat: Optional[Dict[str, np.ndarray]] 244 | y: Dict[str, np.ndarray] 245 | info: Dict[str, Any] 246 | normalization: Optional[str] 247 | cat_policy: str 248 | seed: int 249 | full_cat_data_for_encoder: Optional[pd.DataFrame] 250 | y_policy: Optional[str] = None 251 | normalizer_path: Optional[str] = None 252 | stage: Optional[str] = None 253 | 254 | @property 255 | def is_binclass(self): 256 | return self.info['task_type'] == "binclass" 257 | 258 | @property 259 | def is_multiclass(self): 260 | return self.info['task_type'] == "multiclass" 261 | 262 | @property 263 | def is_regression(self): 264 | return self.info['task_type'] == "regression" 265 | 266 | @property 267 | def n_num_features(self): 268 | return self.info["n_num_features"] 269 | 270 | @property 271 | def n_cat_features(self): 272 | return self.info["n_cat_features"] 273 | 274 | @property 275 | def n_features(self): 276 | return self.n_num_features + self.n_cat_features 277 | 278 | @property 279 | def n_classes(self): 280 | return self.info["n_classes"] 281 | 282 | @property 283 | def parts(self): 284 | return self.x_num.keys() if self.x_num is not None else self.x_cat.keys() 285 | 286 | def size(self, part: str): 287 | x = self.x_num if self.x_num is not None else self.x_cat 288 | assert x is not None 289 | return len(x[part]) 290 | 291 | def normalize(self, x_num, noise=1e-3): 292 | x_num_train = x_num['train'].copy() 293 | if self.normalization == 'standard': 294 | normalizer = sklearn.preprocessing.StandardScaler() 295 | elif self.normalization == 'quantile': 296 | normalizer = sklearn.preprocessing.QuantileTransformer( 297 | output_distribution='normal', 298 | n_quantiles=max(min(x_num['train'].shape[0] // 30, 1000), 10), 299 | subsample=1e9, 300 | random_state=self.seed, 301 | ) 302 | if noise: 303 | stds = np.std(x_num_train, axis=0, keepdims=True) 304 | noise_std = noise / np.maximum(stds, noise) 305 | x_num_train += noise_std * np.random.default_rng(self.seed).standard_normal(x_num_train.shape) 306 | else: 307 | raise ValueError('Unknown Normalization') 308 | normalizer.fit(x_num_train) 309 | if self.normalizer_path is not None: 310 | if self.stage is None: 311 | raise ValueError('stage is None, only pretrain or downstream are accepted if normalizer_path is not None') 312 | if self.stage == 'pretrain': 313 | pickle.dump(normalizer, open(self.normalizer_path, 'wb')) 314 | print(f'Normalizer saved to {self.normalizer_path}') 315 | if self.stage == 'downstream': 316 | normalizer = pickle.load(open(self.normalizer_path, 'rb')) 317 | print(f'Normalizer loaded from {self.normalizer_path}') 318 | return {k: normalizer.transform(v) for k, v in x_num.items()} 319 | 320 | def handle_missing_values_numerical_features(self, x_num): 321 | # TODO: handle num_nan_masks for SAINT 322 | # num_nan_masks_int = {k: (~np.isnan(v)).astype(int) for k, v in x_num.items()} 323 | num_nan_masks = {k: np.isnan(v) for k, v in x_num.items()} 324 | if any(x.any() for x in num_nan_masks.values()): 325 | 326 | # TODO check if we need self.x_num here 327 | num_new_values = np.nanmean(self.x_num['train'], axis=0) 328 | for k, v in x_num.items(): 329 | num_nan_indices = np.where(num_nan_masks[k]) 330 | v[num_nan_indices] = np.take(num_new_values, num_nan_indices[1]) 331 | return x_num 332 | 333 | def encode_categorical_features(self, x_cat): 334 | encoder = sklearn.preprocessing.OrdinalEncoder(handle_unknown='error', dtype='int64') 335 | encoder.fit(self.full_cat_data_for_encoder.values) 336 | x_cat = {k: encoder.transform(v) for k, v in x_cat.items()} 337 | return x_cat 338 | 339 | def transform_categorical_features_to_ohe(self, x_cat): 340 | ohe = sklearn.preprocessing.OneHotEncoder(handle_unknown='ignore', sparse=False, dtype='float32') 341 | ohe.fit(self.full_cat_data_for_encoder.astype('str')) 342 | x_cat = {k: ohe.transform(v.astype('str')) for k, v in x_cat.items()} 343 | return x_cat 344 | 345 | def concatenate_data(self, x_cat, x_num): 346 | if self.cat_policy == 'indices': 347 | result = [x_num, x_cat] 348 | 349 | elif self.cat_policy == 'ohe': 350 | # TODO: handle output for models that need ohe 351 | raise ValueError('Not implemented') 352 | return result 353 | 354 | def preprocess_data(self): 355 | # TODO: seed (?) 356 | logging.info('Building Dataset') 357 | # TODO: figure out if we really need a copy of data or if we can preprocess it in place 358 | if self.x_num: 359 | x_num = deepcopy(self.x_num) 360 | x_num = self.handle_missing_values_numerical_features(x_num) 361 | if self.normalization: 362 | x_num = self.normalize(x_num) 363 | else: 364 | # if x_num is None replace with empty tensor for dataloader 365 | x_num = {part: torch.empty(self.size(part), 0) for part in self.parts} 366 | 367 | # if there are no categorical features, return only numerical features 368 | if self.cat_policy == 'drop' or not self.x_cat: 369 | assert x_num is not None 370 | x_num = to_tensors(x_num) 371 | # if x_cat is None replace with empty tensor for dataloader 372 | x_cat = {part: torch.empty(self.size(part), 0) for part in self.parts} 373 | return [x_num, x_cat] 374 | 375 | x_cat = deepcopy(self.x_cat) 376 | # x_cat_nan_masks = {k: v == '___null___' for k, v in x_cat.items()} 377 | x_cat = self.encode_categorical_features(x_cat) 378 | 379 | x_cat, x_num = to_tensors(x_cat), to_tensors(x_num) 380 | result = self.concatenate_data(x_cat, x_num) 381 | 382 | return result 383 | 384 | def build_y(self): 385 | if self.is_regression: 386 | assert self.y_policy == 'mean_std' 387 | y = deepcopy(self.y) 388 | if self.y_policy: 389 | if not self.is_regression: 390 | warnings.warn('y_policy is not None, but the task is NOT regression') 391 | info = None 392 | elif self.y_policy == 'mean_std': 393 | mean, std = self.y['train'].mean(), self.y['train'].std() 394 | y = {k: (v - mean) / std for k, v in y.items()} 395 | info = {'policy': self.y_policy, 'mean': mean, 'std': std} 396 | else: 397 | raise ValueError('Unknown y policy') 398 | else: 399 | info = None 400 | 401 | y = to_tensors(y) 402 | if self.is_regression or self.is_binclass: 403 | y = {part: y[part].float() for part in self.parts} 404 | return y, info 405 | 406 | 407 | def to_tensors(data): 408 | return {k: torch.as_tensor(v) for k, v in data.items()} 409 | -------------------------------------------------------------------------------- /deep_tabular/utils/get_demo_dataset.py: -------------------------------------------------------------------------------- 1 | """ get_demo_dataset.py 2 | Utilities for splitting yeast data into upstream and downstream tasks 3 | Developed for Tabular-Transfer-Learning project 4 | March 2022 5 | Data link: http://mulan.sourceforge.net/datasets-mlc.html 6 | """ 7 | import numpy as np 8 | import scipy 9 | import pandas as pd 10 | from scipy.io import arff 11 | from sklearn import preprocessing 12 | from sklearn.model_selection import train_test_split 13 | data, meta = scipy.io.arff.loadarff('data/yeast/yeast.arff') 14 | df = pd.DataFrame(data) 15 | 16 | target_columns = [col for col in df.columns if 'Class' in col] 17 | non_target_columns = [col for col in df.columns if 'Class' not in col] 18 | 19 | le = preprocessing.LabelEncoder() 20 | Y = df[target_columns].apply(le.fit_transform) 21 | print(Y) 22 | 23 | X = df[non_target_columns] 24 | 25 | downstream_target_index = 5 26 | downstream_target = target_columns[downstream_target_index] 27 | target_columns.pop(downstream_target_index) 28 | 29 | X_upstream, X_downstream, Y_upstream, Y_downstream = train_test_split(X, Y, test_size=0.2, random_state=0) 30 | Y_downstream = Y_downstream[downstream_target] 31 | Y_upstream = Y_upstream[target_columns] 32 | 33 | X_upstream.to_csv('data/yeast_upstream/N.csv', index = False) 34 | Y_upstream.to_csv('data/yeast_upstream/y.csv', index = False) 35 | 36 | X_downstream.to_csv('data/yeast_downstream/N.csv', index = False) 37 | Y_downstream.to_csv('data/yeast_downstream/y.csv', index = False) 38 | 39 | #0.628 with TL 40 | #0.578 with no TL 41 | -------------------------------------------------------------------------------- /deep_tabular/utils/mimic_tools.py: -------------------------------------------------------------------------------- 1 | """ mimic_tools.py 2 | Utilities for splitting MetaMIMIC data into upstream and downstream tasks 3 | Developed for Tabular-Transfer-Learning project 4 | March 2022 5 | """ 6 | 7 | import numpy as np 8 | from sklearn.preprocessing import LabelEncoder 9 | import pickle 10 | import os 11 | import random 12 | import pandas as pd 13 | import torch 14 | import sklearn 15 | from sklearn.model_selection import train_test_split 16 | 17 | def split_mimic(): 18 | mimic = pd.read_csv('../../../data/mimic/metaMIMIC.csv', delimiter = ',') 19 | mimic_target_columns = ['diabetes_diagnosed', 'hypertensive_diagnosed', 'ischematic_diagnosed', 20 | 'heart_diagnosed', 'overweight_diagnosed', 'anemia_diagnosed', 'respiratory_diagnosed', 21 | 'hypotension_diagnosed', 'lipoid_diagnosed', 'atrial_diagnosed', 'purpura_diagnosed', 'alcohol_diagnosed'] 22 | y_full = mimic[mimic_target_columns] 23 | mimic.drop(columns = ['subject_id'], inplace = True) 24 | mimic.drop(columns = mimic_target_columns, inplace = True) 25 | X_full = mimic.astype('float') 26 | categorical_columns = ['gender'] 27 | numerical_columns = list(X_full.columns[X_full.columns != 'gender']) 28 | X_full.loc[X_full['gender'] == 1, 'gender'] = 'male' 29 | X_full.loc[X_full['gender'] == 0, 'gender'] = 'female' 30 | 31 | 32 | X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=1) 33 | X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1875, random_state=1) # 0.1875 x 0.8 = 0.15 34 | 35 | X_train.to_csv('../../../data/mimic/mimic_train_X.csv', index = False) 36 | X_val.to_csv('../../../data/mimic/mimic_val_X.csv', index = False) 37 | X_test.to_csv('../../../data/mimic/mimic_test_X.csv', index = False) 38 | y_train.to_csv('../../../data/mimic/mimic_train_y.csv', index = False) 39 | y_val.to_csv('../../../data/mimic/mimic_val_y.csv', index = False) 40 | y_test.to_csv('../../../data/mimic/mimic_test_y.csv', index = False) 41 | return 42 | 43 | 44 | 45 | def data_prep_transfer_mimic(ds_id, task, stage='pretrain', downstream_target=0, downstream_samples_per_class = 2): 46 | """ 47 | Function to create a transfer learning task based on the metaMIMIC data 48 | """ 49 | seed = 0 50 | np.random.seed(seed) 51 | 52 | mimic_target_columns = ['diabetes_diagnosed', 'hypertensive_diagnosed', 'ischematic_diagnosed', 53 | 'heart_diagnosed', 'overweight_diagnosed', 'anemia_diagnosed', 'respiratory_diagnosed', 54 | 'hypotension_diagnosed', 'lipoid_diagnosed', 'atrial_diagnosed', 'purpura_diagnosed', 55 | 'alcohol_diagnosed'] 56 | X_train = pd.read_csv('../../../data/mimic/mimic_train_X.csv') 57 | X_val = pd.read_csv('../../../data/mimic/mimic_val_X.csv') 58 | X_test = pd.read_csv('../../../data/mimic/mimic_test_X.csv') 59 | y_train_full = pd.read_csv('../../../data/mimic/mimic_train_y.csv') 60 | y_val_full = pd.read_csv('../../../data/mimic/mimic_val_y.csv') 61 | y_test_full = pd.read_csv('../../../data/mimic/mimic_test_y.csv') 62 | categorical_columns = ['gender'] 63 | numerical_columns = list(X_train.columns[X_train.columns != 'gender']) 64 | X_train[categorical_columns] = X_train[categorical_columns].fillna("MissingValue") 65 | X_val[categorical_columns] = X_val[categorical_columns].fillna("MissingValue") 66 | X_test[categorical_columns] = X_test[categorical_columns].fillna("MissingValue") 67 | print(numerical_columns) 68 | print(categorical_columns) 69 | 70 | if task == 'binclass': 71 | if 'downstream' in stage: 72 | #Merge validation set into train, keep the dummy validation set for the code not to fail 73 | y_train_full = pd.concat([y_train_full, y_val_full], ignore_index=True) 74 | X_train = pd.concat([X_train, X_val], ignore_index=True) 75 | print('Using downstream target:', mimic_target_columns[downstream_target]) 76 | y_train = y_train_full[mimic_target_columns[downstream_target]] 77 | y_val = y_val_full[mimic_target_columns[downstream_target]] 78 | y_test = y_test_full[mimic_target_columns[downstream_target]] 79 | elif 'pretrain' in stage: 80 | #Do multitarget in regular pretrain 81 | print('Dropping downstream target:', mimic_target_columns[downstream_target]) 82 | y_train = y_train_full.drop(columns=[mimic_target_columns[downstream_target]]) 83 | y_val = y_val_full.drop(columns=[mimic_target_columns[downstream_target]]) 84 | y_test = y_test_full.drop(columns=[mimic_target_columns[downstream_target]]) 85 | else: 86 | raise ValueError('Stage is incorrect!') 87 | else: 88 | raise NotImplementedError('Mimic only accepts binclass tasks: binclass with multiple targets for pretraining and binclass with a single target for downstream') 89 | 90 | X_train_full = X_train.copy() 91 | y_train_full = y_train.copy() 92 | if ('downstream' in stage): 93 | #switching to downstream_samples_per_class 94 | print('Total num classes:', len(set(y_train))) 95 | total_num_of_classes = len(set(y_train)) 96 | X_train, _, y_train, _ = train_test_split(X_train, y_train, 97 | train_size=downstream_samples_per_class * len(set(y_train)), 98 | stratify=y_train, random_state = seed) 99 | print('Sample num classes:', len(set(y_train))) 100 | sample_num_classes = len(set(y_train)) 101 | if sample_num_classes < total_num_of_classes: 102 | print('Resampling and guaranteeing at least one sample per class') 103 | X_train, y_train = stratified_sample_at_least_one_per_class(X_train_full, y_train_full, downstream_samples_per_class, seed) 104 | sample_num_classes = len(set(y_train)) 105 | print('New sample num classes:', len(set(y_train))) 106 | assert total_num_of_classes == sample_num_classes 107 | 108 | 109 | X_cat_train = X_train[categorical_columns].values 110 | X_num_train = X_train[numerical_columns].values 111 | y_train = y_train.values.astype('float') 112 | 113 | X_cat_val = X_val[categorical_columns].values 114 | X_num_val = X_val[numerical_columns].values 115 | y_val = y_val.values.astype('float') 116 | 117 | X_cat_test = X_test[categorical_columns].values 118 | X_num_test = X_test[numerical_columns].values 119 | y_test = y_test.values.astype('float') 120 | 121 | info = {} 122 | info['name'] = ds_id 123 | info['stage'] = stage 124 | info['split'] = seed 125 | info['task_type'] = task 126 | info['n_num_features'] = len(numerical_columns) 127 | info['n_cat_features'] = len(categorical_columns) 128 | info['train_size'] = X_train.shape[0] 129 | info['val_size'] = X_val.shape[0] 130 | info['test_size'] = X_test.shape[0] 131 | 132 | 133 | if len(y_train.shape) > 1: 134 | info['n_classes'] = y_train.shape[1] 135 | else: 136 | info['n_classes'] = 1 137 | 138 | if len(numerical_columns) > 0: 139 | #We should not have access to a validation set in the limited data regime, replace it with train to make sure 140 | if ('downstream' in stage): 141 | X_num_val = X_num_train 142 | numerical_data = {'train': X_num_train, 'val': X_num_val, 'test': X_num_test} 143 | else: 144 | numerical_data = None 145 | 146 | if len(categorical_columns) > 0: 147 | #We should not have access to a validation set in the limited data regime, replace it with train to make sure 148 | if ('downstream' in stage): 149 | X_cat_val = X_cat_train 150 | categorical_data = {'train': X_cat_train, 'val': X_cat_val, 'test': X_cat_test} 151 | else: 152 | categorical_data = None 153 | 154 | #We should not have access to a validation set in the limited data regime, replace it with train to make sure 155 | if ('downstream' in stage): 156 | y_val = y_train 157 | targets = {'train': y_train, 'val': y_val, 'test': y_test} 158 | print('\n Train size:{} Val size:{} Test size:{}'.format(len(y_train), len(y_val), len(y_test))) 159 | 160 | if len(categorical_columns) > 0: 161 | #this only works with mimic since the only categorical feature is gender 162 | full_cat_data_for_encoder = X_train_full[categorical_columns] 163 | else: 164 | full_cat_data_for_encoder = None 165 | 166 | return numerical_data, categorical_data, targets, info, full_cat_data_for_encoder 167 | 168 | 169 | def stratified_sample_at_least_one_per_class(X_train, y_train, downstream_samples_per_class, seed): 170 | # Sample 1 element per class 171 | X_train['y'] = y_train 172 | X_one_sample = X_train.groupby(by='y').sample(n=1) 173 | y_one_sample = X_one_sample['y'] 174 | X_one_sample = X_one_sample.drop(columns=['y']) 175 | # Add a stratified sample from the rest of the data 176 | X_train = X_train[~X_train.index.isin(X_one_sample.index)] 177 | y_train = X_train['y'] 178 | X_train = X_train.drop(columns=['y']) 179 | X_train, _, y_train, _ = train_test_split(X_train, y_train, 180 | train_size=downstream_samples_per_class * len(set(y_train)) - len( 181 | X_one_sample), 182 | stratify=y_train, random_state=seed) 183 | X_train = pd.concat([X_train, X_one_sample], axis=0) 184 | y_train = pd.concat([y_train, y_one_sample], axis=0) 185 | return X_train, y_train 186 | -------------------------------------------------------------------------------- /deep_tabular/utils/testing.py: -------------------------------------------------------------------------------- 1 | """ testing.py 2 | Utilities for testing models 3 | Developed for Tabular-Transfer-Learning project 4 | March 2022 5 | Some functionality adopted from https://github.com/Yura52/rtdl 6 | """ 7 | 8 | import torch 9 | from sklearn.metrics import accuracy_score, mean_squared_error, balanced_accuracy_score, roc_auc_score 10 | from tqdm import tqdm 11 | 12 | 13 | # Ignore statements for pylint: 14 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 15 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 16 | # Too many local variables (R0914), Missing docstring (C0116, C0115, C0114). 17 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115, C0114 18 | 19 | 20 | def evaluate_model(net, loaders, task, device): 21 | scores = [] 22 | for loader in loaders: 23 | score = test_default(net, loader, task, device) 24 | scores.append(score) 25 | return scores 26 | 27 | 28 | def test_default(net, testloader, task, device): 29 | net.eval() 30 | targets_all = [] 31 | predictions_all = [] 32 | with torch.no_grad(): 33 | for batch_idx, (inputs_num, inputs_cat, targets) in enumerate(tqdm(testloader, leave=False)): 34 | inputs_num, inputs_cat, targets = inputs_num.to(device).float(), inputs_cat.to(device), targets.to(device) 35 | inputs_num, inputs_cat = inputs_num if inputs_num.nelement() != 0 else None, \ 36 | inputs_cat if inputs_cat.nelement() != 0 else None 37 | 38 | outputs = net(inputs_num, inputs_cat) 39 | if task == "multiclass": 40 | predicted = torch.argmax(outputs, dim=1) 41 | elif task == "binclass": 42 | predicted = outputs 43 | elif task == "regression": 44 | predicted = outputs 45 | targets_all.extend(targets.cpu().tolist()) 46 | predictions_all.extend(predicted.cpu().tolist()) 47 | 48 | if task == "multiclass": 49 | accuracy = accuracy_score(targets_all, predictions_all) 50 | balanced_accuracy = balanced_accuracy_score(targets_all, predictions_all, adjusted=False) 51 | balanced_accuracy_adjusted = balanced_accuracy_score(targets_all, predictions_all, adjusted=True) 52 | scores = {"score": accuracy, 53 | "accuracy": accuracy, 54 | "balanced_accuracy": balanced_accuracy, 55 | "balanced_accuracy_adjusted": balanced_accuracy_adjusted} 56 | elif task == "regression": 57 | rmse = mean_squared_error(targets_all, predictions_all, squared=False) 58 | scores = {"score": -rmse, 59 | "rmse": -rmse} 60 | elif task == "binclass": 61 | roc_auc = roc_auc_score(targets_all, predictions_all) 62 | scores = {"score": roc_auc, 63 | "roc_auc": roc_auc} 64 | return scores 65 | 66 | 67 | def evaluate_backbone(embedders, backbone, heads, loaders, tasks, device): 68 | scores = {} 69 | for k in loaders.keys(): 70 | score = evaluate_backbone_one_dataset(embedders[k], backbone, heads[k], loaders[k], tasks[k], device) 71 | scores[k] = score 72 | return scores 73 | 74 | 75 | def evaluate_backbone_one_dataset(embedder, backbone, head, testloader, task, device): 76 | embedder.eval() 77 | backbone.eval() 78 | head.eval() 79 | targets_all = [] 80 | predictions_all = [] 81 | with torch.no_grad(): 82 | for batch_idx, (inputs_num, inputs_cat, targets) in enumerate(tqdm(testloader, leave=False)): 83 | inputs_num, inputs_cat, targets = inputs_num.to(device).float(), inputs_cat.to(device), targets.to(device) 84 | inputs_num, inputs_cat = inputs_num if inputs_num.nelement() != 0 else None, \ 85 | inputs_cat if inputs_cat.nelement() != 0 else None 86 | 87 | embedding = embedder(inputs_num, inputs_cat) 88 | features = backbone(embedding) 89 | outputs = head(features) 90 | 91 | if task == "multiclass": 92 | predicted = torch.argmax(outputs, dim=1) 93 | elif task == "binclass": 94 | predicted = outputs 95 | elif task == "regression": 96 | predicted = outputs 97 | targets_all.extend(targets.cpu().tolist()) 98 | predictions_all.extend(predicted.cpu().tolist()) 99 | 100 | if task == "multiclass": 101 | accuracy = accuracy_score(targets_all, predictions_all) 102 | balanced_accuracy = balanced_accuracy_score(targets_all, predictions_all, adjusted=False) 103 | balanced_accuracy_adjusted = balanced_accuracy_score(targets_all, predictions_all, adjusted=True) 104 | scores = {"score": accuracy, 105 | "accuracy": accuracy, 106 | "balanced_accuracy": balanced_accuracy, 107 | "balanced_accuracy_adjusted": balanced_accuracy_adjusted} 108 | elif task == "regression": 109 | rmse = mean_squared_error(targets_all, predictions_all, squared=False) 110 | scores = {"score": -rmse, 111 | "rmse": -rmse} 112 | elif task == "binclass": 113 | roc_auc = roc_auc_score(targets_all, predictions_all) 114 | scores = {"score": roc_auc, 115 | "roc_auc": roc_auc} 116 | return scores 117 | -------------------------------------------------------------------------------- /deep_tabular/utils/tools.py: -------------------------------------------------------------------------------- 1 | """ tools.py 2 | Utility functions that are common to all tasks 3 | Developed for Tabular-Transfer-Learning project 4 | March 2022 5 | """ 6 | 7 | import logging 8 | import os 9 | import random 10 | from collections import OrderedDict 11 | import torch 12 | from icecream import ic 13 | from torch.optim import SGD, Adam, AdamW 14 | from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, LambdaLR, ChainedScheduler 15 | from torch.utils.data import TensorDataset, DataLoader 16 | 17 | import deep_tabular.models as models 18 | from .data_tools import get_data, get_categories_full_cat_data, TabularDataset, get_multilabel_data 19 | from .warmup import ExponentialWarmup, LinearWarmup 20 | from ..adjectives import adjectives 21 | from ..names import names 22 | from .mimic_tools import data_prep_transfer_mimic 23 | 24 | 25 | # Ignore statements for pylint: 26 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 27 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 28 | # Too many local variables (R0914), Missing docstring (C0116, C0115). 29 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115 30 | 31 | def generate_run_id(): 32 | hashstr = f"{adjectives[random.randint(0, len(adjectives))]}-{names[random.randint(0, len(names))]}" 33 | return hashstr 34 | 35 | 36 | def write_to_tb(stats, stat_names, epoch, writer): 37 | for name, stat in zip(stat_names, stats): 38 | stat_name = os.path.join("val", name) 39 | writer.add_scalar(stat_name, stat, epoch) 40 | 41 | 42 | def get_dataloaders(cfg, which_dataset=None): 43 | """ 44 | cfg: OmegaConf, dictionary of configurations 45 | which_dataset: int of None, indicates which dataset to use if multiple are specified in the config 46 | """ 47 | 48 | if which_dataset is not None: 49 | cfg_dataset = cfg.dataset[which_dataset] 50 | else: 51 | cfg_dataset = cfg.dataset 52 | 53 | if cfg_dataset.task == 'multilabel': 54 | #Changing the task to binclass because multilabel is just binary cross-entropy over multilabel logits 55 | cfg_dataset.task = 'binclass' 56 | x_numerical, x_categorical, y, info, full_cat_data_for_encoder = get_multilabel_data(ds_id=cfg_dataset.name, 57 | source=cfg_dataset.source, 58 | task=cfg_dataset.task) 59 | elif cfg_dataset.name == 'mimic': 60 | x_numerical, x_categorical, y, info, full_cat_data_for_encoder = data_prep_transfer_mimic(ds_id=cfg_dataset.name, 61 | task=cfg_dataset.task, 62 | stage=cfg_dataset.stage, 63 | downstream_target=cfg_dataset.downstream_target, 64 | downstream_samples_per_class=cfg_dataset.downstream_sample_num//2) 65 | else: 66 | x_numerical, x_categorical, y, info, full_cat_data_for_encoder = get_data(dataset_id=cfg_dataset.name, 67 | source=cfg_dataset.source, 68 | task=cfg_dataset.task, 69 | datasplit=[.65, .15, .2]) 70 | 71 | dataset = TabularDataset(x_numerical, x_categorical, y, info, normalization=cfg_dataset.normalization, 72 | cat_policy="indices", 73 | seed=0, 74 | full_cat_data_for_encoder=full_cat_data_for_encoder, 75 | y_policy=cfg_dataset.y_policy, 76 | normalizer_path=cfg_dataset.normalizer_path, 77 | stage=cfg_dataset.stage) 78 | 79 | X = dataset.preprocess_data() 80 | Y, y_info = dataset.build_y() 81 | unique_categories = get_categories_full_cat_data(full_cat_data_for_encoder) 82 | n_numerical = dataset.n_num_features 83 | n_categorical = dataset.n_cat_features 84 | n_classes = dataset.n_classes 85 | logging.info(f"Task: {cfg_dataset.task}, Dataset: {cfg_dataset.name}, n_numerical: {n_numerical}, " 86 | f"n_categorical: {n_categorical}, n_classes: {n_classes}, n_train_samples: {dataset.size('train')}, " 87 | f"n_val_samples: {dataset.size('val')}, n_test_samples: {dataset.size('test')}") 88 | 89 | trainset = TensorDataset(X[0]["train"], X[1]["train"], Y["train"]) 90 | valset = TensorDataset(X[0]["val"], X[1]["val"], Y["val"]) 91 | testset = TensorDataset(X[0]["test"], X[1]["test"], Y["test"]) 92 | 93 | trainloader = DataLoader(trainset, batch_size=cfg.hyp.train_batch_size, shuffle=True, drop_last=True) 94 | valloader = DataLoader(valset, batch_size=cfg.hyp.test_batch_size, shuffle=False, drop_last=False) 95 | testloader = DataLoader(testset, batch_size=cfg.hyp.test_batch_size, shuffle=False, drop_last=False) 96 | 97 | 98 | loaders = {"train": trainloader, "val": valloader, "test": testloader} 99 | return loaders, unique_categories, n_numerical, n_classes 100 | 101 | 102 | def get_model(model, num_numerical, unique_categories, num_outputs, d_embedding, model_params): 103 | model = model.lower() 104 | net = getattr(models, model)(num_numerical, unique_categories, num_outputs, d_embedding, model_params) 105 | return net 106 | 107 | 108 | def get_embedder(cfg, num_numerical, unique_categories): 109 | model_name = cfg.model.name.lower() 110 | if model_name == "ft_transformer": 111 | embedder = models.ft_tokenizer(num_numerical, unique_categories, cfg.model.d_embedding, cfg.model.token_bias) 112 | else: 113 | raise NotImplementedError(f"Model name is {model_name}, but this is not yet implemented.") 114 | return embedder 115 | 116 | 117 | class Squeeze(torch.nn.Module): 118 | def forward(self, x): 119 | return torch.squeeze(x) 120 | 121 | 122 | 123 | def get_backbone(cfg, device): 124 | model_name = cfg.model.name.lower() 125 | if model_name == "ft_transformer": 126 | net = models.ft_backbone(cfg.model) 127 | else: 128 | raise NotImplementedError(f"Model name is {model_name}, but this is not yet implemented.") 129 | if cfg.model.model_path is not None: 130 | logging.info(f"Loading backbone from checkpoint {cfg.model.model_path}...") 131 | state_dict = torch.load(cfg.model.model_path, map_location=device) 132 | net.load_state_dict(state_dict["backbone"]) 133 | net = net.to(device) 134 | return net 135 | 136 | 137 | def get_optimizer_for_single_net(optim_args, net, state_dict): 138 | warmup = ExponentialWarmup if optim_args.warmup_type == "exponential" else LinearWarmup 139 | 140 | if optim_args.head_lr is not None: 141 | head_name, head_module = list(net.named_modules())[-1] 142 | head_parameters = [v for k, v in net.named_parameters() if head_name in k] 143 | feature_extractor_parameters = [v for k, v in net.named_parameters() if head_name not in k] 144 | all_params = [{'params': feature_extractor_parameters}, 145 | {'params': head_parameters, 'lr': optim_args.head_lr}] 146 | else: 147 | all_params = [{"params": [p for n, p in net.named_parameters()]}] 148 | 149 | if optim_args.optimizer.lower() == "sgd": 150 | optimizer = SGD(all_params, lr=optim_args.lr, weight_decay=optim_args.weight_decay, 151 | momentum=optim_args.momentum) 152 | elif optim_args.optimizer.lower() == "adam": 153 | optimizer = Adam(all_params, lr=optim_args.lr, weight_decay=optim_args.weight_decay) 154 | elif optim_args.optimizer.lower() == "adamw": 155 | optimizer = AdamW(all_params, lr=optim_args.lr, weight_decay=optim_args.weight_decay) 156 | else: 157 | raise ValueError(f"{ic.format()}: Optimizer choice of {optim_args.optimizer.lower()} not yet implmented. " 158 | f"Should be one of ['sgd', 'adam', 'adamw'].") 159 | 160 | if state_dict is not None: 161 | optimizer.load_state_dict(state_dict) 162 | warmup_scheduler = warmup(optimizer, warmup_period=0) 163 | else: 164 | warmup_scheduler = warmup(optimizer, warmup_period=optim_args.warmup_period) 165 | 166 | if optim_args.lr_decay.lower() == "step": 167 | lr_scheduler = MultiStepLR(optimizer, milestones=optim_args.lr_schedule, 168 | gamma=optim_args.lr_factor, last_epoch=-1) 169 | elif optim_args.lr_decay.lower() == "cosine": 170 | lr_scheduler = CosineAnnealingLR(optimizer, optim_args.epochs, eta_min=0, last_epoch=-1, verbose=False) 171 | else: 172 | raise ValueError(f"{ic.format()}: Learning rate decay style {optim_args.lr_decay} not yet implemented.") 173 | 174 | #Freeze feature extractor and warm the head for some period 175 | if optim_args.head_warmup_period is not None: 176 | #Multiply the feature extractor lr by 0 during the head warmup period 177 | lambda_feature_extractor = lambda epoch: 0 if epoch < optim_args.head_warmup_period else 1 178 | lambda_head = lambda epoch: 1 179 | head_warmup_scheduler = LambdaLR(optimizer, lr_lambda = [lambda_feature_extractor, lambda_head]) 180 | lr_scheduler = ChainedScheduler([head_warmup_scheduler, lr_scheduler]) 181 | return optimizer, warmup_scheduler, lr_scheduler 182 | 183 | 184 | def get_optimizer_for_backbone(optim_args, embedders, backbone, heads, state_dict=None): 185 | warmup = ExponentialWarmup if optim_args.warmup_type == "exponential" else LinearWarmup 186 | 187 | all_params = [{"params": [p for p in backbone.parameters()], "lr": optim_args.lr}] 188 | all_params.extend([{f"params": [p for p in v.parameters()], 189 | "lr": optim_args.lr_for_embedders} for v in embedders.values()]) 190 | all_params.extend([{f"params": [p for p in v.parameters()], 191 | "lr": optim_args.lr_for_heads} for v in heads.values()]) 192 | 193 | if optim_args.optimizer.lower() == "adamw": 194 | optimizer = AdamW(all_params, weight_decay=optim_args.weight_decay) 195 | elif optim_args.optimizer.lower() == "sgd": 196 | optimizer = SGD(all_params, momentum=0.9, weight_decay=optim_args.weight_decay) 197 | else: 198 | raise ValueError(f"{ic.format()}: Optimizer choice of {optim_args.optimizer.lower()} not yet implmented. " 199 | f"Should be one of ['adamw'].") 200 | 201 | if state_dict is not None: 202 | optimizer.load_state_dict(state_dict) 203 | warmup_scheduler = warmup(optimizer, warmup_period=0) 204 | else: 205 | warmup_scheduler = warmup(optimizer, warmup_period=optim_args.warmup_period) 206 | 207 | if optim_args.lr_decay.lower() == "step": 208 | lr_scheduler = MultiStepLR(optimizer, milestones=optim_args.lr_schedule, 209 | gamma=optim_args.lr_factor, last_epoch=-1) 210 | elif optim_args.lr_decay.lower() == "cosine": 211 | lr_scheduler = CosineAnnealingLR(optimizer, optim_args.epochs, eta_min=0, last_epoch=-1, verbose=False) 212 | else: 213 | raise ValueError(f"{ic.format()}: Learning rate decay style {optim_args.lr_decay} not yet implemented.") 214 | 215 | return optimizer, warmup_scheduler, lr_scheduler 216 | 217 | 218 | def get_criterion(task): 219 | if task == "multiclass": 220 | criterion = torch.nn.CrossEntropyLoss() 221 | elif task == "binclass": 222 | criterion = torch.nn.BCEWithLogitsLoss() 223 | elif task == "regression": 224 | criterion = torch.nn.MSELoss() 225 | else: 226 | raise ValueError(f"No loss function implemented for task {task}.") 227 | return criterion 228 | 229 | def get_head(model_name, net): 230 | if model_name in ['ft_transformer', 'resnet', 'mlp']: 231 | head_name = 'head' 232 | head_module = net.head 233 | else: 234 | head_name, head_module = list(net.named_modules())[-1] 235 | print(f'Original head: {head_name}, {head_module}\n') 236 | return head_name, head_module 237 | 238 | def remove_parallel(state_dict): 239 | ''' state_dict: state_dict of model saved with DataParallel() 240 | returns state_dict without extra module level ''' 241 | new_state_dict = OrderedDict() 242 | for k, v in state_dict.items(): 243 | name = k[7:] # remove module. 244 | new_state_dict[name] = v 245 | return new_state_dict 246 | 247 | def load_transfer_model_from_checkpoint(model_args, num_numerical, unique_categories, num_outputs, device): 248 | model = model_args.name 249 | model_path = model_args.model_path 250 | d_embedding = model_args.d_embedding 251 | use_mlp_head = model_args.use_mlp_head 252 | freeze_feature_extractor = model_args.freeze_feature_extractor 253 | epoch = 0 254 | optimizer = None 255 | 256 | net = get_model(model, num_numerical, unique_categories, num_outputs, d_embedding, model_args) 257 | net = net.to(device) 258 | head_name, head_module = get_head(model_args.name, net) 259 | if model_path is not None: 260 | logging.info(f"Loading model from checkpoint {model_path}...") 261 | state_dict = torch.load(model_path, map_location=device) 262 | if device == "cuda": 263 | state_dict["net"] = remove_parallel(state_dict["net"]) 264 | pretrained_feature_extractor_dict = {k: v for k, v in state_dict["net"].items() if head_name not in k} 265 | missing_keys, unexpected_keys = net.load_state_dict(pretrained_feature_extractor_dict, strict = False) 266 | print('State dict successfully loaded from pretrained checkpoint. Original head reinitialized.') 267 | print('Missing keys:{}\nUnexpected keys:{}\n'.format(missing_keys, unexpected_keys)) 268 | # epoch = state_dict["epoch"] + 1 269 | # optimizer = state_dict["optimizer"] 270 | if freeze_feature_extractor: 271 | trainable_params = [] 272 | for name, param in net.named_parameters(): 273 | if not any(x in name for x in [head_name]): 274 | # if head_name not in name: 275 | param.requires_grad = False 276 | else: 277 | trainable_params.append(name) 278 | print(f'Feature extractor frozen. Trainable params: {trainable_params}') 279 | 280 | if use_mlp_head: 281 | emb_dim = head_module.in_features 282 | out_dim = head_module.out_features 283 | head_module = torch.nn.Sequential( 284 | torch.nn.Linear(emb_dim, 200), 285 | torch.nn.ReLU(), 286 | torch.nn.Linear(200, 200), 287 | torch.nn.ReLU(), 288 | torch.nn.Linear(200, out_dim)).to(device) 289 | setattr(net, head_name, head_module) 290 | print('New head set to:', net.head) 291 | if device == "cuda": 292 | net = torch.nn.DataParallel(net) 293 | return net, epoch, optimizer 294 | 295 | 296 | def load_model_from_checkpoint(model_args, num_numerical, unique_categories, num_outputs, device): 297 | model = model_args.name 298 | model_path = model_args.model_path 299 | d_embedding = model_args.d_embedding 300 | epoch = 0 301 | optimizer = None 302 | 303 | net = get_model(model, num_numerical, unique_categories, num_outputs, d_embedding, model_args) 304 | net = net.to(device) 305 | if device == "cuda": 306 | net = torch.nn.DataParallel(net) 307 | if model_path is not None: 308 | logging.info(f"Loading model from checkpoint {model_path}...") 309 | state_dict = torch.load(model_path, map_location=device) 310 | net.load_state_dict(state_dict["net"]) 311 | epoch = state_dict["epoch"] + 1 312 | optimizer = state_dict["optimizer"] 313 | 314 | return net, epoch, optimizer 315 | 316 | -------------------------------------------------------------------------------- /deep_tabular/utils/training.py: -------------------------------------------------------------------------------- 1 | """ training.py 2 | Utilities for training models 3 | Developed for Tabular-Transfer-Learning project 4 | March 2022 5 | """ 6 | 7 | import random 8 | from dataclasses import dataclass 9 | from typing import Any 10 | 11 | # from icecream import ic 12 | from tqdm import tqdm 13 | 14 | 15 | # Ignore statemenst for pylint: 16 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 17 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 18 | # Too many local variables (R0914), Missing docstring (C0116, C0115, C0114), 19 | # Unused import (W0611). 20 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115, C0114, W0611 21 | 22 | 23 | @dataclass 24 | class TrainingSetup: 25 | """Attributes to describe the training precedure""" 26 | criterions: Any 27 | optimizer: Any 28 | scheduler: Any 29 | warmup: Any 30 | num_datasets_in_batch: Any = None 31 | 32 | 33 | def default_training_loop(net, trainloader, train_setup, device): 34 | net.train() 35 | optimizer = train_setup.optimizer 36 | lr_scheduler = train_setup.scheduler 37 | warmup_scheduler = train_setup.warmup 38 | criterion = train_setup.criterions 39 | 40 | train_loss = 0 41 | total = 0 42 | 43 | for batch_idx, (inputs_num, inputs_cat, targets) in enumerate(tqdm(trainloader, leave=False)): 44 | inputs_num, inputs_cat, targets = inputs_num.to(device).float(), inputs_cat.to(device), targets.to(device) 45 | inputs_num, inputs_cat = inputs_num if inputs_num.nelement() != 0 else None, \ 46 | inputs_cat if inputs_cat.nelement() != 0 else None 47 | 48 | optimizer.zero_grad() 49 | outputs = net(inputs_num, inputs_cat) 50 | loss = criterion(outputs, targets) 51 | loss.backward() 52 | optimizer.step() 53 | 54 | train_loss += loss.item() 55 | total += targets.size(0) 56 | 57 | train_loss = train_loss / (batch_idx + 1) 58 | 59 | lr_scheduler.step() 60 | warmup_scheduler.dampen() 61 | 62 | return train_loss 63 | 64 | -------------------------------------------------------------------------------- /deep_tabular/utils/warmup.py: -------------------------------------------------------------------------------- 1 | """ warmup.py 2 | code for warmup learning rate scheduler 3 | borrowed from https://github.com/ArneNx/pytorch_warmup/tree/warmup_fix 4 | and modified July 2020 5 | """ 6 | 7 | import math 8 | 9 | from torch.optim import Optimizer 10 | 11 | 12 | # Ignore statemenst for pylint: 13 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 14 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 15 | # Too many local variables (R0914). 16 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914 17 | 18 | 19 | class BaseWarmup: 20 | """Base class for all warmup schedules 21 | 22 | Arguments: 23 | optimizer (Optimizer): an instance of a subclass of Optimizer 24 | warmup_params (list): warmup paramters 25 | last_step (int): The index of last step. (Default: -1) 26 | warmup_period (int or list): Warmup period 27 | """ 28 | 29 | def __init__(self, optimizer, warmup_params, last_step=-1, warmup_period=0): 30 | if not isinstance(optimizer, Optimizer): 31 | raise TypeError('{} is not an Optimizer'.format( 32 | type(optimizer).__name__)) 33 | self.optimizer = optimizer 34 | self.warmup_params = warmup_params 35 | self.last_step = last_step 36 | self.base_lrs = [group['lr'] for group in self.optimizer.param_groups] 37 | self.warmup_period = warmup_period 38 | self.dampen() 39 | 40 | def state_dict(self): 41 | """Returns the state of the warmup scheduler as a :class:`dict`. 42 | 43 | It contains an entry for every variable in self.__dict__ which 44 | is not the optimizer. 45 | """ 46 | return {key: value for key, value in self.__dict__.items() if key != 'optimizer'} 47 | 48 | def load_state_dict(self, state_dict): 49 | """Loads the warmup scheduler's state. 50 | 51 | Arguments: 52 | state_dict (dict): warmup scheduler state. Should be an object returned 53 | from a call to :meth:`state_dict`. 54 | """ 55 | self.__dict__.update(state_dict) 56 | 57 | def dampen(self, step=None): 58 | """Dampen the learning rates. 59 | 60 | Arguments: 61 | step (int): The index of current step. (Default: None) 62 | """ 63 | if step is None: 64 | step = self.last_step + 1 65 | self.last_step = step 66 | if isinstance(self.warmup_period, int) and step < self.warmup_period: 67 | for i, (group, params) in enumerate(zip(self.optimizer.param_groups, 68 | self.warmup_params)): 69 | if isinstance(self.warmup_period, list) and step >= self.warmup_period[i]: 70 | continue 71 | omega = self.warmup_factor(step, **params) 72 | group['lr'] = omega * self.base_lrs[i] 73 | 74 | def warmup_factor(self, step, warmup_period): 75 | """Place holder for objects that inherit BaseWarmup.""" 76 | raise NotImplementedError 77 | 78 | 79 | def get_warmup_params(warmup_period, group_count): 80 | if type(warmup_period) == list: 81 | if len(warmup_period) != group_count: 82 | raise ValueError( 83 | 'size of warmup_period does not equal {}.'.format(group_count)) 84 | for x in warmup_period: 85 | if type(x) != int: 86 | raise ValueError( 87 | 'An element in warmup_period, {}, is not an int.'.format( 88 | type(x).__name__)) 89 | warmup_params = [dict(warmup_period=x) for x in warmup_period] 90 | elif type(warmup_period) == int: 91 | warmup_params = [dict(warmup_period=warmup_period) 92 | for _ in range(group_count)] 93 | else: 94 | raise TypeError('{} is not a list nor an int.'.format( 95 | type(warmup_period).__name__)) 96 | return warmup_params 97 | 98 | 99 | class LinearWarmup(BaseWarmup): 100 | """Linear warmup schedule. 101 | 102 | Arguments: 103 | optimizer (Optimizer): an instance of a subclass of Optimizer 104 | warmup_period (int or list): Warmup period 105 | last_step (int): The index of last step. (Default: -1) 106 | """ 107 | 108 | def __init__(self, optimizer, warmup_period, last_step=-1): 109 | group_count = len(optimizer.param_groups) 110 | warmup_params = get_warmup_params(warmup_period, group_count) 111 | super().__init__(optimizer, warmup_params, last_step, warmup_period) 112 | 113 | def warmup_factor(self, step, warmup_period): 114 | return min(1.0, (step+1) / warmup_period) 115 | 116 | 117 | class ExponentialWarmup(BaseWarmup): 118 | """Exponential warmup schedule. 119 | 120 | Arguments: 121 | optimizer (Optimizer): an instance of a subclass of Optimizer 122 | warmup_period (int or list): Effective warmup period 123 | last_step (int): The index of last step. (Default: -1) 124 | """ 125 | 126 | def __init__(self, optimizer, warmup_period, last_step=-1): 127 | group_count = len(optimizer.param_groups) 128 | warmup_params = get_warmup_params(warmup_period, group_count) 129 | super().__init__(optimizer, warmup_params, last_step, warmup_period) 130 | 131 | def warmup_factor(self, step, warmup_period): 132 | if step + 1 >= warmup_period: 133 | return 1.0 134 | else: 135 | return 1.0 - math.exp(-(step+1) / warmup_period) 136 | -------------------------------------------------------------------------------- /optune_from_scratch.py: -------------------------------------------------------------------------------- 1 | """ optune_from_scratch.py 2 | Tune neural networks using Optuna 3 | Developed for Tabular Transfer Learning project 4 | March 2022 5 | """ 6 | 7 | import train_net_from_scratch 8 | import hydra 9 | import optuna 10 | import sys 11 | import deep_tabular as dt 12 | import os 13 | import copy 14 | from omegaconf import DictConfig, OmegaConf 15 | import json 16 | 17 | 18 | def sample_value_with_default(trial, name, distr, min, max, default): 19 | # chooses suggested or default value with 50/50 chance 20 | if distr == 'uniform': 21 | value_suggested = trial.suggest_uniform(name, min, max) 22 | elif distr == 'loguniform': 23 | value_suggested = trial.suggest_loguniform(name, min, max) 24 | value = value_suggested if trial.suggest_categorical(f'optional_{name}', [False, True]) else default 25 | return value 26 | # 27 | 28 | def get_parameters(model, trial): 29 | if model=='ft_transformer': 30 | model_params = { 31 | 'd_embedding': trial.suggest_int('d_embedding', 32, 512, step=8), #using n_heads = 8 by default 32 | 'n_layers': trial.suggest_int('n_layers', 1, 4), 33 | 'd_ffn_factor': trial.suggest_uniform('d_ffn_factor', 2/3, 8/3), 34 | 'attention_dropout': trial.suggest_uniform('attention_dropout', 0.0, 0.5), 35 | 'ffn_dropout' : trial.suggest_uniform('attention_dropout', 0.0, 0.5), 36 | 'residual_dropout': sample_value_with_default(trial, 'residual_dropout', 'uniform', 0.0, 0.2, 0.0), 37 | } 38 | training_params = { 39 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1e-3), 40 | 'weight_decay': trial.suggest_loguniform('weight_decay', 1e-6, 1e-3), 41 | } 42 | 43 | if model=='resnet': 44 | model_params = { 45 | 'd_embedding': trial.suggest_int('d_embedding', 32, 512, step=8), 46 | 'd_hidden_factor': trial.suggest_uniform('d_hidden_factor', 1.0, 4.0), 47 | 'n_layers': trial.suggest_int('n_layers', 1, 8,), 48 | 'hidden_dropout': trial.suggest_uniform('residual_dropout', 0.0, 0.5), 49 | 'residual_dropout': sample_value_with_default(trial, 'residual_dropout', 'uniform', 0.0, 0.5, 0.0), 50 | } 51 | training_params = { 52 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1e-3), 53 | 'weight_decay': sample_value_with_default(trial, 'weight_decay', 'loguniform', 1e-6, 1e-3, 0.0), 54 | } 55 | 56 | if model=='mlp': 57 | n_layers = trial.suggest_int('n_layers', 1, 8) 58 | suggest_dim = lambda name: trial.suggest_int(name, 1, 512) 59 | d_first = [suggest_dim('d_first')] if n_layers else [] 60 | d_middle = ([suggest_dim('d_middle')] * (n_layers - 2) if n_layers > 2 else []) 61 | d_last = [suggest_dim('d_last')] if n_layers > 1 else [] 62 | layers = d_first + d_middle + d_last 63 | 64 | model_params = { 65 | 'd_embedding': trial.suggest_int('d_embedding', 32, 512, step=8), 66 | 'd_layers': layers, 67 | 'dropout': sample_value_with_default(trial, 'dropout', 'uniform', 0.0, 0.5, 0.0), 68 | } 69 | training_params = { 70 | 'lr': trial.suggest_loguniform('lr', 1e-5, 1e-3), 71 | 'weight_decay': sample_value_with_default(trial, 'weight_decay', 'loguniform', 1e-6, 1e-3, 0.0), 72 | } 73 | 74 | return model_params, training_params 75 | 76 | 77 | 78 | def objective(trial, cfg: DictConfig, trial_configs, trial_stats): 79 | 80 | model_params, training_params = get_parameters(cfg.model.name, trial) # need to suggest parameters for optuna here, probably writing a function for suggesting parameters is the optimal way 81 | 82 | config = copy.deepcopy(cfg) # create config for train_model with suggested parameters 83 | for par, value in model_params.items(): 84 | config.model[par] = value 85 | for par, value in training_params.items(): 86 | config.hyp[par] = value 87 | 88 | 89 | stats = train_net_from_scratch.main(config) 90 | 91 | trial_configs.append(config) 92 | trial_stats.append(stats) 93 | print(stats) 94 | 95 | return stats['val_stats']['score'] 96 | 97 | 98 | @hydra.main(config_path="config", config_name="optune_config") 99 | def main(cfg): 100 | n_optuna_trials = 50 101 | 102 | trial_stats = [] 103 | trial_configs = [] 104 | study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.MedianPruner()) 105 | func = lambda trial: objective(trial, cfg, trial_configs, trial_stats) 106 | study.optimize(func, n_trials=n_optuna_trials) 107 | 108 | best_trial = study.best_trial 109 | 110 | for key, value in best_trial.params.items(): 111 | print("{}: {}".format(key, value)) 112 | 113 | best_stats = trial_stats[best_trial.number] 114 | 115 | with open(os.path.join("best_stats.json"), "w") as fp: 116 | json.dump(best_stats, fp, indent = 4) 117 | with open(os.path.join("best_config.json"), "w") as fp: 118 | json.dump(best_trial.params, fp, indent = 4) 119 | 120 | 121 | 122 | 123 | 124 | 125 | if __name__ == "__main__": 126 | run_id = dt.utils.generate_run_id() 127 | sys.argv.append(f"+run_id={run_id}") 128 | main() 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | hydra-core==1.1.1 2 | icecream~=2.1.1 3 | matplotlib~=3.5.0 4 | numpy~=1.21.4 5 | omegaconf~=2.1.1 6 | pandas~=1.3.4 7 | Pillow==8.2.0 8 | scipy==1.7.0 9 | seaborn~=0.11.2 10 | svglib==1.1.0 11 | tensorboard==2.2.2 12 | tensorboard-plugin-wit==1.8.0 13 | torch~=1.10.0 14 | torchvision==0.8.2 15 | tqdm~=4.62.3 16 | -------------------------------------------------------------------------------- /train_net_from_scratch.py: -------------------------------------------------------------------------------- 1 | """ train_net_from_scratch.py 2 | Train, test, and save neural networks without transfer learning 3 | Developed for Tabular Transfer Learning project 4 | March 2022 5 | """ 6 | 7 | import json 8 | import logging 9 | import os 10 | import sys 11 | from collections import OrderedDict 12 | 13 | import hydra 14 | import numpy as np 15 | import torch 16 | from icecream import ic 17 | from omegaconf import DictConfig, OmegaConf 18 | from torch.utils.tensorboard import SummaryWriter 19 | 20 | import deep_tabular as dt 21 | 22 | 23 | 24 | # Ignore statements for pylint: 25 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 26 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 27 | # Too many local variables (R0914), Missing docstring (C0116, C0115). 28 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115 29 | 30 | @hydra.main(config_path="config", config_name="train_net_config") 31 | def main(cfg: DictConfig): 32 | device = "cuda" if torch.cuda.is_available() else "cpu" 33 | torch.backends.cudnn.benchmark = True 34 | log = logging.getLogger() 35 | log.info("\n_________________________________________________\n") 36 | log.info("train_net_from_scratch.py main() running.") 37 | log.info(OmegaConf.to_yaml(cfg)) 38 | if cfg.hyp.save_period < 0: 39 | cfg.hyp.save_period = 1e8 40 | torch.manual_seed(cfg.hyp.seed) 41 | torch.cuda.manual_seed_all(cfg.hyp.seed) 42 | writer = SummaryWriter(log_dir=f"tensorboard") 43 | 44 | #################################################### 45 | # Dataset and Network and Optimizer 46 | loaders, unique_categories, n_numerical, n_classes = dt.utils.get_dataloaders(cfg) 47 | 48 | net, start_epoch, optimizer_state_dict = dt.utils.load_model_from_checkpoint(cfg.model, 49 | n_numerical, 50 | unique_categories, 51 | n_classes, 52 | device) 53 | pytorch_total_params = sum(p.numel() for p in net.parameters()) 54 | 55 | log.info(f"This {cfg.model.name} has {pytorch_total_params / 1e6:0.3f} million parameters.") 56 | log.info(f"Training will start at epoch {start_epoch}.") 57 | 58 | optimizer, warmup_scheduler, lr_scheduler = dt.utils.get_optimizer_for_single_net(cfg.hyp, 59 | net, 60 | optimizer_state_dict) 61 | criterion = dt.utils.get_criterion(cfg.dataset.task) 62 | train_setup = dt.TrainingSetup(criterions=criterion, 63 | optimizer=optimizer, 64 | scheduler=lr_scheduler, 65 | warmup=warmup_scheduler) 66 | #################################################### 67 | 68 | #################################################### 69 | # Train 70 | log.info(f"==> Starting training for {max(cfg.hyp.epochs - start_epoch, 0)} epochs...") 71 | highest_val_acc_so_far = -np.inf 72 | done = False 73 | epoch = start_epoch 74 | best_epoch = epoch 75 | 76 | while not done and epoch < cfg.hyp.epochs: 77 | # forward and backward pass for one whole epoch handeld inside dt.default_training_loop() 78 | loss = dt.default_training_loop(net, loaders["train"], train_setup, device) 79 | log.info(f"Training loss at epoch {epoch}: {loss}") 80 | 81 | # if the loss is nan, then stop the training 82 | if np.isnan(float(loss)): 83 | raise ValueError(f"{ic.format()} Loss is nan, exiting...") 84 | 85 | # TensorBoard writing 86 | writer.add_scalar("Loss/loss", loss, epoch) 87 | for i in range(len(optimizer.param_groups)): 88 | writer.add_scalar(f"Learning_rate/group{i}", 89 | optimizer.param_groups[i]["lr"], 90 | epoch) 91 | 92 | # evaluate the model periodically and at the final epoch 93 | if (epoch + 1) % cfg.hyp.val_period == 0 or epoch + 1 == cfg.hyp.epochs: 94 | test_stats, val_stats, train_stats = dt.evaluate_model(net, 95 | [loaders["test"], loaders["val"], loaders["train"]], 96 | cfg.dataset.task, 97 | device) 98 | log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}") 99 | log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}") 100 | log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}") 101 | 102 | dt.utils.write_to_tb([train_stats["score"], val_stats["score"], test_stats["score"]], 103 | [f"train_acc-{cfg.dataset.name}", 104 | f"val_acc-{cfg.dataset.name}", 105 | f"test_acc-{cfg.dataset.name}"], 106 | epoch, 107 | writer) 108 | 109 | if cfg.hyp.use_patience: 110 | val_stats, test_stats = dt.evaluate_model(net, 111 | [loaders["val"], loaders["test"]], 112 | cfg.dataset.task, 113 | device) 114 | if val_stats["score"] > highest_val_acc_so_far: 115 | best_epoch = epoch 116 | highest_val_acc_so_far = val_stats["score"] 117 | log.info(f"New best epoch, val score: {val_stats['score']}") 118 | # save current model 119 | state = {"net": net.state_dict(), "epoch": epoch, "optimizer": optimizer.state_dict()} 120 | out_str = "model_best.pth" 121 | log.info(f"Saving model to: {out_str}") 122 | torch.save(state, out_str) 123 | 124 | if epoch - best_epoch > cfg.hyp.patience: 125 | done = True 126 | epoch += 1 127 | writer.flush() 128 | writer.close() 129 | 130 | log.info("Running Final Evaluation...") 131 | checkpoint_path = "model_best.pth" 132 | net.load_state_dict(torch.load(checkpoint_path)["net"]) 133 | test_stats, val_stats, train_stats = dt.evaluate_model(net, 134 | [loaders["test"], loaders["val"], loaders["train"]], 135 | cfg.dataset.task, 136 | device) 137 | 138 | log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}") 139 | log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}") 140 | log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}") 141 | 142 | stats = OrderedDict([("dataset", cfg.dataset.name), 143 | ("model_name", cfg.model.name), 144 | ("run_id", cfg.run_id), 145 | ("best_epoch", best_epoch), 146 | ("routine", "from_scratch"), 147 | ("test_stats", test_stats), 148 | ("train_stats", train_stats), 149 | ("val_stats", val_stats)]) 150 | with open(os.path.join("stats.json"), "w") as fp: 151 | json.dump(stats, fp, indent=4) 152 | log.info(json.dumps(stats, indent=4)) 153 | #################################################### 154 | return stats 155 | 156 | 157 | if __name__ == "__main__": 158 | run_id = dt.utils.generate_run_id() 159 | sys.argv.append(f"+run_id={run_id}") 160 | main() 161 | -------------------------------------------------------------------------------- /transfer_learn_net.py: -------------------------------------------------------------------------------- 1 | """ transfer_learn_net.py 2 | Pre-train/fine-tune, test, and save neural networks 3 | Developed for Tabular Transfer Learning project 4 | March 2022 5 | """ 6 | 7 | import json 8 | import logging 9 | import os 10 | import sys 11 | from collections import OrderedDict 12 | 13 | import hydra 14 | import numpy as np 15 | import torch 16 | from icecream import ic 17 | from omegaconf import DictConfig, OmegaConf 18 | from torch.utils.tensorboard import SummaryWriter 19 | 20 | import deep_tabular as dt 21 | 22 | 23 | 24 | # Ignore statements for pylint: 25 | # Too many branches (R0912), Too many statements (R0915), No member (E1101), 26 | # Not callable (E1102), Invalid name (C0103), No exception (W0702), 27 | # Too many local variables (R0914), Missing docstring (C0116, C0115). 28 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115 29 | 30 | @hydra.main(config_path="config", config_name="transfer_learn_net_config") 31 | def main(cfg: DictConfig): 32 | device = "cuda" if torch.cuda.is_available() else "cpu" 33 | torch.backends.cudnn.benchmark = True 34 | log = logging.getLogger() 35 | log.info("\n_________________________________________________\n") 36 | log.info("train_net_from_scratch.py main() running.") 37 | log.info(OmegaConf.to_yaml(cfg)) 38 | if cfg.hyp.save_period < 0: 39 | cfg.hyp.save_period = 1e8 40 | torch.manual_seed(cfg.hyp.seed) 41 | torch.cuda.manual_seed_all(cfg.hyp.seed) 42 | writer = SummaryWriter(log_dir=f"tensorboard") 43 | 44 | #################################################### 45 | # Dataset and Network and Optimizer 46 | loaders, unique_categories, n_numerical, n_classes = dt.utils.get_dataloaders(cfg) 47 | 48 | net, start_epoch, optimizer_state_dict = dt.utils.load_transfer_model_from_checkpoint(cfg.model, 49 | n_numerical, 50 | unique_categories, 51 | n_classes, 52 | device) 53 | pytorch_total_params = sum(p.numel() for p in net.parameters()) 54 | 55 | log.info(f"This {cfg.model.name} has {pytorch_total_params / 1e6:0.3f} million parameters.") 56 | log.info(f"Training will start at epoch {start_epoch}.") 57 | 58 | optimizer, warmup_scheduler, lr_scheduler = dt.utils.get_optimizer_for_single_net(cfg.hyp, 59 | net, 60 | optimizer_state_dict) 61 | criterion = dt.utils.get_criterion(cfg.dataset.task) 62 | train_setup = dt.TrainingSetup(criterions=criterion, 63 | optimizer=optimizer, 64 | scheduler=lr_scheduler, 65 | warmup=warmup_scheduler) 66 | #################################################### 67 | 68 | #################################################### 69 | # Train 70 | log.info(f"==> Starting training for {max(cfg.hyp.epochs - start_epoch, 0)} epochs...") 71 | highest_val_acc_so_far = -np.inf 72 | done = False 73 | epoch = start_epoch 74 | best_epoch = epoch 75 | 76 | while not done and epoch < cfg.hyp.epochs: 77 | # forward and backward pass for one whole epoch handeld inside dt.default_training_loop() 78 | loss = dt.default_training_loop(net, loaders["train"], train_setup, device) 79 | log.info(f"Training loss at epoch {epoch}: {loss}") 80 | # if the loss is nan, then stop the training 81 | if np.isnan(float(loss)): 82 | raise ValueError(f"{ic.format()} Loss is nan, exiting...") 83 | 84 | # TensorBoard writing 85 | writer.add_scalar("Loss/loss", loss, epoch) 86 | for i in range(len(optimizer.param_groups)): 87 | writer.add_scalar(f"Learning_rate/group{i}", 88 | optimizer.param_groups[i]["lr"], 89 | epoch) 90 | 91 | # evaluate the model periodically and at the final epoch 92 | if (epoch + 1) % cfg.hyp.val_period == 0 or epoch + 1 == cfg.hyp.epochs: 93 | test_stats, val_stats, train_stats = dt.evaluate_model(net, 94 | [loaders["test"], loaders["val"], loaders["train"]], 95 | cfg.dataset.task, 96 | device) 97 | log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}") 98 | log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}") 99 | log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}") 100 | 101 | dt.utils.write_to_tb([train_stats["score"], val_stats["score"], test_stats["score"]], 102 | [f"train_acc-{cfg.dataset.name}", 103 | f"val_acc-{cfg.dataset.name}", 104 | f"test_acc-{cfg.dataset.name}"], 105 | epoch, 106 | writer) 107 | 108 | if cfg.hyp.use_patience: 109 | val_stats, test_stats = dt.evaluate_model(net, 110 | [loaders["val"], loaders["test"]], 111 | cfg.dataset.task, 112 | device) 113 | if val_stats["score"] > highest_val_acc_so_far: 114 | best_epoch = epoch 115 | highest_val_acc_so_far = val_stats["score"] 116 | log.info(f"New best epoch, val score: {val_stats['score']}") 117 | # save current model 118 | state = {"net": net.state_dict(), "epoch": epoch, "optimizer": optimizer.state_dict()} 119 | out_str = "model_best.pth" 120 | log.info(f"Saving model to: {out_str}") 121 | torch.save(state, out_str) 122 | 123 | if epoch - best_epoch > cfg.hyp.patience: 124 | done = True 125 | epoch += 1 126 | writer.flush() 127 | writer.close() 128 | 129 | log.info("Running Final Evaluation...") 130 | checkpoint_path = "model_best.pth" 131 | net.load_state_dict(torch.load(checkpoint_path)["net"]) 132 | test_stats, val_stats, train_stats = dt.evaluate_model(net, 133 | [loaders["test"], loaders["val"], loaders["train"]], 134 | cfg.dataset.task, 135 | device) 136 | 137 | log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}") 138 | log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}") 139 | log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}") 140 | 141 | stats = OrderedDict([("dataset", cfg.dataset.name), 142 | ("model_name", cfg.model.name), 143 | ("run_id", cfg.run_id), 144 | ("best_epoch", best_epoch), 145 | ("routine", "from_scratch"), 146 | ("test_stats", test_stats), 147 | ("train_stats", train_stats), 148 | ("val_stats", val_stats)]) 149 | with open(os.path.join("stats.json"), "w") as fp: 150 | json.dump(stats, fp, indent=4) 151 | log.info(json.dumps(stats, indent=4)) 152 | #################################################### 153 | return stats 154 | 155 | 156 | if __name__ == "__main__": 157 | run_id = dt.utils.generate_run_id() 158 | sys.argv.append(f"+run_id={run_id}") 159 | main() 160 | --------------------------------------------------------------------------------