├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── dataset
    │   ├── d_188.yaml
    │   ├── mimic.yaml
    │   ├── yeast_downstream.yaml
    │   └── yeast_upstream.yaml
    ├── hydra
    │   └── job_logging
    │   │   └── custom.yaml
    ├── hyp
    │   ├── _.yaml
    │   ├── hyp_for_single_net.yaml
    │   └── hyp_for_transfer.yaml
    ├── model
    │   ├── ft_transformer.yaml
    │   ├── ft_transformer_downstream.yaml
    │   ├── ft_transformer_pretrain.yaml
    │   ├── mlp.yaml
    │   └── resnet.yaml
    ├── optune_config.yaml
    ├── train_net_config.yaml
    └── transfer_learn_net_config.yaml
├── data
    ├── yeast_downstream
    │   ├── N.csv
    │   └── y.csv
    └── yeast_upstream
    │   ├── N.csv
    │   ├── normalizer.pkl
    │   └── y.csv
├── deep_tabular
    ├── __init__.py
    ├── adjectives.py
    ├── models
    │   ├── __init__.py
    │   ├── ft_transformer.py
    │   ├── mlp.py
    │   └── resnet.py
    ├── names.py
    └── utils
    │   ├── __init__.py
    │   ├── data_tools.py
    │   ├── get_demo_dataset.py
    │   ├── mimic_tools.py
    │   ├── testing.py
    │   ├── tools.py
    │   ├── training.py
    │   └── warmup.py
├── optune_from_scratch.py
├── requirements.txt
├── train_net_from_scratch.py
└── transfer_learn_net.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | */output*/
  2 | outputs/
  3 | plots/output.pdf/
  4 | .cml_*temp.sh
  5 | .idea
  6 | .idea/*
  7 | pretrained_models
  8 | logs
  9 | cmllogs
 10 | .DS_Store
 11 | *.pdf
 12 | */*.pdf
 13 | junk*
 14 | maze_data/train_*
 15 | maze_data/test_*
 16 | clean_performance.csv
 17 | scripts/launch*.sh
 18 | scripts/*/launch*.sh
 19 | results
 20 | runs
 21 | plots
 22 | classification_training
 23 | check_default
 24 | checkpoints
 25 | *.png
 26 | helpers/data/
 27 | plots/
 28 | launch/my_launch/
 29 | output_default
 30 | 
 31 | # Byte-compiled / optimized / DLL files
 32 | __pycache__/
 33 | *.py[cod]
 34 | *$py.class
 35 | 
 36 | # C extensions
 37 | *.so
 38 | 
 39 | # Distribution / packaging
 40 | .Python
 41 | build/
 42 | develop-eggs/
 43 | dist/
 44 | downloads/
 45 | eggs/
 46 | .eggs/
 47 | lib/
 48 | lib64/
 49 | parts/
 50 | sdist/
 51 | var/
 52 | wheels/
 53 | *.egg-info/
 54 | .installed.cfg
 55 | *.egg
 56 | MANIFEST
 57 | 
 58 | # PyInstaller
 59 | #  Usually these files are written by a python script from a template
 60 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 61 | *.manifest
 62 | *.spec
 63 | 
 64 | # Installer logs
 65 | pip-log.txt
 66 | pip-delete-this-directory.txt
 67 | 
 68 | # Unit test / coverage reports
 69 | htmlcov/
 70 | .tox/
 71 | .coverage
 72 | .coverage.*
 73 | .cache
 74 | nosetests.xml
 75 | coverage.xml
 76 | *.cover
 77 | .hypothesis/
 78 | .pytest_cache/
 79 | 
 80 | # Translations
 81 | *.mo
 82 | *.pot
 83 | 
 84 | # Django stuff:
 85 | *.log
 86 | local_settings.py
 87 | db.sqlite3
 88 | 
 89 | # Flask stuff:
 90 | instance/
 91 | .webassets-cache
 92 | 
 93 | # Scrapy stuff:
 94 | .scrapy
 95 | 
 96 | # Sphinx documentation
 97 | docs/_build/
 98 | 
 99 | # PyBuilder
100 | target/
101 | 
102 | # Jupyter Notebook
103 | .ipynb_checkpoints
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # celery beat schedule file
109 | celerybeat-schedule
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | *.zip
136 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Valeriia Cherepanova, Roman Levin, and Avi Schwarzschild
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tabular Transfer Learning
 2 | 
 3 | This repository contains the official implementation of the paper  
 4 | 
 5 | [Transfer Learning with Deep Tabular Models](https://arxiv.org/abs/2206.15306), now accepted to ICLR 2023!
 6 | 
 7 | Roman Levin, Valeriia Cherepanova, Avi Schwarzschild, Arpit Bansal, C. Bayan Bruss, Tom Goldstein, Andrew Gordon Wilson, Micah Goldblum.
 8 | 
 9 | ## Citation
10 | 
11 | If you find our work useful, please cite 
12 | ```
13 | @article{levin2022transfer,
14 |   title={Transfer Learning with Deep Tabular Models},
15 |   author={Levin, Roman and Cherepanova, Valeriia and Schwarzschild, Avi and Bansal, Arpit and Bruss, C Bayan and Goldstein, Tom and Wilson, Andrew Gordon and Goldblum, Micah},
16 |   journal={arXiv preprint arXiv:2206.15306},
17 |   year={2022}
18 | }
19 | ```
20 | 
21 | ## Getting Started
22 | 
23 | ### Requirements
24 | This code was developed and tested with Python 3.8.2.
25 | 
26 | To install requirements:
27 | 
28 | ```$ pip install -r requirements.txt```
29 | 
30 | ## Demo Transfer Learning Experiment
31 | While in the paper we used the MetaMIMIC test bed for our transfer learning experiments (please, see instructions below for obtaining it), we provide a demo experiment with a readily downloadable [Yeast](http://mulan.sourceforge.net/datasets-mlc.html) dataset -- a multilabel dataset with 14 targets.
32 | 
33 | We created a basic transfer learning setup by splitting the Yeast data into a multi-label [yeast_upstream](data/yeast_upstream) dataset with 13 targets for pretraining and [yeast_downstream](data/yeast_downtream) with the remaining 14-th target as the downstream target.
34 | 
35 | Now, we first pretrain FT-Transformer on the upstream data (for details please see the config files implemented using [Hydra](https://hydra.cc/docs/intro/)):
36 | 
37 | ```$ python transfer_learn_net.py model=ft_transformer_pretrain dataset=yeast_upstream```
38 | 
39 | Then, we fine-tune the pretrained model on the downstream data:
40 | 
41 | ```$ python transfer_learn_net.py model=ft_transformer_downstream dataset=yeast_downstream```
42 | 
43 | And compare the results to the model trained from scratch on the downstream data:
44 | 
45 | ```$ python  train_net_from_scratch.py model=ft_transformer dataset=yeast_downstream```
46 | 
47 | On the pretrainining 13-target multi-label task with 1400 samples we get AUC of approximately 0.7. The model with transfer learning scores 0.63 AUC on the downstream binary task with 300 samples, while the model trained from scratch achieves 0.58 AUC.
48 | ## MetaMIMIC
49 | In our paper we used the MetaMIMIC test bed for our transfer learning experiments which is based on the [MIMIC-IV clinical database](https://physionet.org/content/mimiciv/1.0/) of ICU admissions. Please see the [MetaMIMIC GitHub](https://github.com/ModelOriented/metaMIMIC) for instructions on constructing the MetaMIMIC dataset. Once constructed, please put it in `data/mimic/MetaMIMIC.csv` and use the provided `config/dataset/mimic.yaml` config.
50 | 
51 | ## Saving Protocol 
52 | 
53 | Each time any of the main scripts are executed, a hash-like adjective-Name combination is created and saved as the `run_id` for that execution. The `run_id` is used to save checkpoints and results without being able to accidentally overwrite any previous runs with similar hyperparameters. The folder used for saving both checkpoints and results can be chosen using the following command line argument.
54 | 
55 | ```$ python train_net_from_scratch.py name=<path_to_exp>```
56 | 
57 | During training, the best performing model (on held-out validation set) is saved in the folder `outputs/<path_to_exp>/training-<run_id>/model_best.pth` and the corresponding arguments for that run are saved in `outputs/<path_to_exp>/training-<run_id>/.hydra/`. 
58 | 
59 | The results are saved in `outputs/<path_to_exp>/training-<run_id>/stats.json`, the tensorboard data is saved in `outputs/<path_to_exp>/training-<run_id>/tensorboard`.
60 | 
61 | ## Additional Functionality
62 | In addition to transfer learning with deep tabular models, this repo allows to train networks from scratch using ` train_net_from_scratch.py` and to optimize their hyperparameters with [Optuna](https://optuna.org) using `optune_from_scratch.py`
63 | 
64 | ## Contributing
65 | 
66 | We believe in open-source community driven software development. Please open issues and pull requests with any questions or improvements you have.
67 | 
68 | ## References
69 | * We borrow network implementations from the [RTDL repo](https://github.com/Yura52/rtdl) and extensively leverage the RTDL repo in general.
70 | * [Yeast demo data source](http://mulan.sourceforge.net/datasets-mlc.html)
71 | * [MetaMIMIC](https://github.com/ModelOriented/metaMIMIC)
72 | * [MIMIC-IV clinical database](https://physionet.org/content/mimiciv/1.0/)
73 | 


--------------------------------------------------------------------------------
/config/dataset/d_188.yaml:
--------------------------------------------------------------------------------
1 | name: 188
2 | source: openml
3 | task: multiclass
4 | normalization: quantile
5 | y_policy:
6 | 


--------------------------------------------------------------------------------
/config/dataset/mimic.yaml:
--------------------------------------------------------------------------------
1 | name: mimic
2 | stage: downstream
3 | task: binclass
4 | normalization: quantile
5 | downstream_target: 0
6 | downstream_sample_num: 200
7 | y_policy:


--------------------------------------------------------------------------------
/config/dataset/yeast_downstream.yaml:
--------------------------------------------------------------------------------
1 | name: yeast_downstream
2 | source: local
3 | task: binclass
4 | normalization: quantile
5 | normalizer_path: '../../../data/yeast_upstream/normalizer.pkl'
6 | stage: 'downstream'
7 | y_policy:
8 | 


--------------------------------------------------------------------------------
/config/dataset/yeast_upstream.yaml:
--------------------------------------------------------------------------------
1 | name: yeast_upstream
2 | source: local
3 | task: multilabel
4 | normalization: quantile
5 | normalizer_path: '../../../data/yeast_upstream/normalizer.pkl'
6 | stage: 'pretrain'
7 | y_policy:
8 | 


--------------------------------------------------------------------------------
/config/hydra/job_logging/custom.yaml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | formatters:
 3 |   simple:
 4 |     format: "[%(asctime)s %(levelname)s]: %(message)s"
 5 |     datefmt: "%m/%d/%Y %H:%M:%S"
 6 | handlers:
 7 |   console:
 8 |     class: logging.StreamHandler
 9 |     formatter: simple
10 |     stream: ext://sys.stdout
11 |   file:
12 |     class: logging.handlers.RotatingFileHandler
13 |     formatter: simple
14 |     filename: log.log
15 | root:
16 |   handlers: [console, file]
17 | 
18 | disable_existing_loggers: false
19 | 


--------------------------------------------------------------------------------
/config/hyp/_.yaml:
--------------------------------------------------------------------------------
 1 | epochs: 200
 2 | lr: 0.0001
 3 | lr_decay: step
 4 | lr_factor: 0.1
 5 | lr_schedule:
 6 |   - 40
 7 |   - 80
 8 | optimizer: adam
 9 | patience: 30
10 | save_period: -1
11 | seed: 0
12 | test_batch_size: 256
13 | train_batch_size: 256
14 | use_patience: true
15 | val_period: 10
16 | warmup_period: 5
17 | weight_decay: 2e-4
18 | momentum: 0.9
19 | warmup_type: linear
20 | 


--------------------------------------------------------------------------------
/config/hyp/hyp_for_single_net.yaml:
--------------------------------------------------------------------------------
 1 | epochs: 200
 2 | lr: 0.0001
 3 | lr_decay: step
 4 | lr_factor: 0.1
 5 | lr_schedule:
 6 |   - 40
 7 |   - 80
 8 | optimizer: adam
 9 | patience: 30
10 | save_period: -1
11 | seed: 0
12 | test_batch_size: 256
13 | train_batch_size: 256
14 | use_patience: true
15 | val_period: 10
16 | warmup_period: 5
17 | weight_decay: 2e-4
18 | momentum: 0.9
19 | warmup_type: linear
20 | head_warmup_period: 10
21 | head_lr: 0.001
22 | 


--------------------------------------------------------------------------------
/config/hyp/hyp_for_transfer.yaml:
--------------------------------------------------------------------------------
 1 | epochs: 200
 2 | lr: 0.0001
 3 | lr_decay: step
 4 | lr_factor: 0.1
 5 | lr_schedule:
 6 |   - 40
 7 |   - 80
 8 | optimizer: adam
 9 | patience: 30
10 | save_period: -1
11 | seed: 0
12 | test_batch_size: 256
13 | train_batch_size: 256
14 | use_patience: true
15 | val_period: 10
16 | warmup_period: 5
17 | weight_decay: 2e-4
18 | momentum: 0.9
19 | warmup_type: linear
20 | head_warmup_period: 10
21 | head_lr: 0.001
22 | 


--------------------------------------------------------------------------------
/config/model/ft_transformer.yaml:
--------------------------------------------------------------------------------
 1 | name: ft_transformer
 2 | d_embedding: 192
 3 | model_path:
 4 | use_mlp_head: false
 5 | freeze_feature_extractor: false
 6 | token_bias: true
 7 | n_layers: 3
 8 | n_heads: 8
 9 | d_ffn_factor: 1.3333333333
10 | attention_dropout: 0.2
11 | ffn_dropout: 0.1
12 | residual_dropout: 0.0
13 | activation: reglu
14 | prenormalization: true
15 | initialization: kaiming
16 | kv_compression:
17 | kv_compression_sharing:


--------------------------------------------------------------------------------
/config/model/ft_transformer_downstream.yaml:
--------------------------------------------------------------------------------
 1 | name: ft_transformer
 2 | d_embedding: 192
 3 | model_path: '../../../outputs/transfer-learning-experiment/ft_transformer-yeast_upstream/model_best.pth'
 4 | use_mlp_head: false
 5 | freeze_feature_extractor: false
 6 | token_bias: true
 7 | n_layers: 3
 8 | n_heads: 8
 9 | d_ffn_factor: 1.3333333333
10 | attention_dropout: 0.2
11 | ffn_dropout: 0.1
12 | residual_dropout: 0.0
13 | activation: reglu
14 | prenormalization: true
15 | initialization: kaiming
16 | kv_compression:
17 | kv_compression_sharing:
18 | 


--------------------------------------------------------------------------------
/config/model/ft_transformer_pretrain.yaml:
--------------------------------------------------------------------------------
 1 | name: ft_transformer
 2 | d_embedding: 192
 3 | model_path:
 4 | use_mlp_head: false
 5 | freeze_feature_extractor: false
 6 | token_bias: true
 7 | n_layers: 3
 8 | n_heads: 8
 9 | d_ffn_factor: 1.3333333333
10 | attention_dropout: 0.2
11 | ffn_dropout: 0.1
12 | residual_dropout: 0.0
13 | activation: reglu
14 | prenormalization: true
15 | initialization: kaiming
16 | kv_compression:
17 | kv_compression_sharing:


--------------------------------------------------------------------------------
/config/model/mlp.yaml:
--------------------------------------------------------------------------------
1 | dropout: 0.1
2 | name: mlp
3 | d_embedding: 100
4 | model_path:
5 | d_layers: # cfg.model.d_layers = [100, 100]
6 |   - 100
7 |   - 100
8 | 


--------------------------------------------------------------------------------
/config/model/resnet.yaml:
--------------------------------------------------------------------------------
 1 | name: resnet
 2 | d_embedding: 128
 3 | model_path:
 4 | d: 200
 5 | d_hidden_factor: 3
 6 | n_layers: 5
 7 | activation: relu
 8 | normalization: batchnorm
 9 | hidden_dropout: 0.2
10 | residual_dropout: 0.2


--------------------------------------------------------------------------------
/config/optune_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - model: mlp
 3 |   - dataset: d_188
 4 |   - hyp: hyp_for_single_net
 5 |   - override hydra/job_logging: custom
 6 |   - _self_
 7 | 
 8 | 
 9 | hydra:
10 |   run:
11 |     dir: ./outputs/${name}/optuning-${model.name}-${dataset.name}
12 |   job_logging:
13 |     handlers:
14 |       file:
15 |         filename: train.log
16 | 
17 | train_log: train_log
18 | name: from_scratch_optuna
19 | 


--------------------------------------------------------------------------------
/config/train_net_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - model: mlp
 3 |   - dataset: d_188
 4 |   - hyp: hyp_for_single_net
 5 |   - override hydra/job_logging: custom
 6 |   - _self_
 7 | 
 8 | 
 9 | hydra:
10 |   run:
11 |     dir: ./outputs/${name}/training-${model.name}-${dataset.name}
12 |   job_logging:
13 |     handlers:
14 |       file:
15 |         filename: train.log
16 | 
17 | train_log: train_log
18 | name: from_scratch_default
19 | 


--------------------------------------------------------------------------------
/config/transfer_learn_net_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - model: ft_transformer_pretrain
 3 |   - dataset: yeast_upstream
 4 |   - hyp: hyp_for_transfer
 5 |   - override hydra/job_logging: custom
 6 |   - _self_
 7 | 
 8 | 
 9 | hydra:
10 |   run:
11 |     dir: ./outputs/${name}/${model.name}-${dataset.name}
12 |   job_logging:
13 |     handlers:
14 |       file:
15 |         filename: train.log
16 | 
17 | train_log: train_log
18 | name: transfer-learning-experiment
19 | 


--------------------------------------------------------------------------------
/data/yeast_downstream/y.csv:
--------------------------------------------------------------------------------
  1 | Class6
  2 | 1
  3 | 1
  4 | 1
  5 | 0
  6 | 0
  7 | 0
  8 | 0
  9 | 1
 10 | 0
 11 | 0
 12 | 0
 13 | 0
 14 | 0
 15 | 0
 16 | 0
 17 | 0
 18 | 0
 19 | 0
 20 | 1
 21 | 0
 22 | 0
 23 | 0
 24 | 0
 25 | 0
 26 | 0
 27 | 0
 28 | 0
 29 | 0
 30 | 0
 31 | 0
 32 | 0
 33 | 1
 34 | 0
 35 | 1
 36 | 0
 37 | 1
 38 | 0
 39 | 0
 40 | 0
 41 | 1
 42 | 1
 43 | 1
 44 | 0
 45 | 1
 46 | 0
 47 | 0
 48 | 0
 49 | 0
 50 | 1
 51 | 0
 52 | 0
 53 | 1
 54 | 0
 55 | 0
 56 | 0
 57 | 1
 58 | 0
 59 | 0
 60 | 0
 61 | 0
 62 | 1
 63 | 0
 64 | 1
 65 | 0
 66 | 0
 67 | 0
 68 | 0
 69 | 1
 70 | 0
 71 | 1
 72 | 0
 73 | 0
 74 | 1
 75 | 1
 76 | 1
 77 | 0
 78 | 0
 79 | 0
 80 | 0
 81 | 0
 82 | 1
 83 | 0
 84 | 0
 85 | 0
 86 | 0
 87 | 0
 88 | 0
 89 | 0
 90 | 0
 91 | 0
 92 | 0
 93 | 1
 94 | 0
 95 | 0
 96 | 0
 97 | 1
 98 | 1
 99 | 0
100 | 1
101 | 0
102 | 1
103 | 0
104 | 0
105 | 0
106 | 1
107 | 1
108 | 0
109 | 0
110 | 0
111 | 1
112 | 1
113 | 0
114 | 0
115 | 0
116 | 0
117 | 0
118 | 1
119 | 1
120 | 0
121 | 0
122 | 1
123 | 0
124 | 0
125 | 0
126 | 0
127 | 0
128 | 0
129 | 0
130 | 0
131 | 0
132 | 0
133 | 0
134 | 1
135 | 1
136 | 0
137 | 1
138 | 0
139 | 0
140 | 0
141 | 0
142 | 0
143 | 1
144 | 1
145 | 0
146 | 0
147 | 1
148 | 0
149 | 0
150 | 0
151 | 0
152 | 0
153 | 0
154 | 1
155 | 0
156 | 1
157 | 0
158 | 1
159 | 0
160 | 1
161 | 0
162 | 1
163 | 0
164 | 0
165 | 0
166 | 1
167 | 0
168 | 0
169 | 0
170 | 1
171 | 1
172 | 0
173 | 0
174 | 1
175 | 0
176 | 0
177 | 0
178 | 0
179 | 0
180 | 1
181 | 0
182 | 1
183 | 0
184 | 0
185 | 1
186 | 0
187 | 1
188 | 1
189 | 1
190 | 1
191 | 1
192 | 0
193 | 0
194 | 1
195 | 0
196 | 0
197 | 1
198 | 0
199 | 0
200 | 0
201 | 0
202 | 0
203 | 0
204 | 0
205 | 0
206 | 0
207 | 0
208 | 0
209 | 0
210 | 1
211 | 1
212 | 0
213 | 0
214 | 0
215 | 0
216 | 0
217 | 0
218 | 1
219 | 1
220 | 0
221 | 1
222 | 0
223 | 1
224 | 0
225 | 0
226 | 0
227 | 0
228 | 1
229 | 0
230 | 1
231 | 0
232 | 0
233 | 0
234 | 1
235 | 0
236 | 1
237 | 0
238 | 0
239 | 0
240 | 0
241 | 1
242 | 0
243 | 1
244 | 0
245 | 0
246 | 1
247 | 0
248 | 0
249 | 0
250 | 0
251 | 0
252 | 1
253 | 0
254 | 0
255 | 0
256 | 1
257 | 0
258 | 0
259 | 0
260 | 1
261 | 0
262 | 1
263 | 0
264 | 0
265 | 0
266 | 0
267 | 0
268 | 0
269 | 0
270 | 0
271 | 0
272 | 1
273 | 1
274 | 0
275 | 0
276 | 0
277 | 1
278 | 0
279 | 0
280 | 0
281 | 0
282 | 0
283 | 1
284 | 0
285 | 0
286 | 1
287 | 0
288 | 0
289 | 1
290 | 1
291 | 0
292 | 0
293 | 0
294 | 0
295 | 0
296 | 0
297 | 0
298 | 0
299 | 0
300 | 0
301 | 0
302 | 0
303 | 1
304 | 0
305 | 0
306 | 1
307 | 0
308 | 1
309 | 0
310 | 1
311 | 0
312 | 1
313 | 1
314 | 0
315 | 1
316 | 0
317 | 0
318 | 0
319 | 0
320 | 1
321 | 0
322 | 0
323 | 0
324 | 1
325 | 0
326 | 0
327 | 1
328 | 0
329 | 0
330 | 0
331 | 0
332 | 0
333 | 0
334 | 0
335 | 0
336 | 0
337 | 0
338 | 0
339 | 0
340 | 0
341 | 0
342 | 0
343 | 0
344 | 0
345 | 0
346 | 0
347 | 0
348 | 0
349 | 0
350 | 1
351 | 0
352 | 1
353 | 1
354 | 0
355 | 0
356 | 1
357 | 0
358 | 0
359 | 1
360 | 1
361 | 0
362 | 0
363 | 0
364 | 0
365 | 0
366 | 0
367 | 1
368 | 0
369 | 1
370 | 0
371 | 0
372 | 0
373 | 0
374 | 0
375 | 0
376 | 0
377 | 0
378 | 1
379 | 0
380 | 1
381 | 0
382 | 1
383 | 0
384 | 1
385 | 0
386 | 0
387 | 0
388 | 1
389 | 0
390 | 0
391 | 1
392 | 0
393 | 0
394 | 0
395 | 0
396 | 0
397 | 0
398 | 0
399 | 0
400 | 1
401 | 0
402 | 0
403 | 0
404 | 0
405 | 1
406 | 0
407 | 0
408 | 0
409 | 0
410 | 0
411 | 0
412 | 1
413 | 0
414 | 0
415 | 0
416 | 0
417 | 0
418 | 1
419 | 0
420 | 1
421 | 1
422 | 0
423 | 1
424 | 0
425 | 0
426 | 0
427 | 0
428 | 0
429 | 0
430 | 0
431 | 0
432 | 1
433 | 1
434 | 0
435 | 0
436 | 1
437 | 0
438 | 0
439 | 0
440 | 0
441 | 0
442 | 0
443 | 0
444 | 1
445 | 0
446 | 0
447 | 1
448 | 0
449 | 0
450 | 0
451 | 0
452 | 0
453 | 0
454 | 1
455 | 0
456 | 0
457 | 0
458 | 0
459 | 0
460 | 1
461 | 1
462 | 0
463 | 0
464 | 0
465 | 1
466 | 0
467 | 0
468 | 1
469 | 0
470 | 0
471 | 1
472 | 0
473 | 0
474 | 0
475 | 1
476 | 0
477 | 0
478 | 0
479 | 0
480 | 0
481 | 0
482 | 0
483 | 0
484 | 0
485 | 1
486 | 


--------------------------------------------------------------------------------
/data/yeast_upstream/normalizer.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LevinRoman/tabular-transfer-learning/5a4e4cf4c7cebdf16c58157504ab6a639623c90d/data/yeast_upstream/normalizer.pkl


--------------------------------------------------------------------------------
/deep_tabular/__init__.py:
--------------------------------------------------------------------------------
 1 | from deep_tabular import models
 2 | from deep_tabular import utils
 3 | from deep_tabular.adjectives import adjectives
 4 | from deep_tabular.names import names
 5 | from deep_tabular.utils.testing import evaluate_model, evaluate_backbone, evaluate_backbone_one_dataset
 6 | from deep_tabular.utils.training import TrainingSetup, default_training_loop
 7 | 
 8 | __all__ = ["evaluate_model",
 9 |            "default_training_loop",
10 |            "evaluate_backbone",
11 |            "evaluate_backbone_one_dataset",
12 |            "models",
13 |            "TrainingSetup",
14 |            "utils"]
15 | 


--------------------------------------------------------------------------------
/deep_tabular/adjectives.py:
--------------------------------------------------------------------------------
1 | adjectives = ['abased', 'abject', 'ablaze', 'abler', 'ablest', 'abloom', 'ablush', 'about', 'abreast', 'abridged', 'abroach', 'abroad', 'abrupt', 'abscessed', 'absolved', 'absorbed', 'abstruse', 'absurd', 'abused', 'abuzz', 'accrete', 'accrued', 'accurst', 'acerb', 'aching', 'acock', 'acold', 'acorned', 'acred', 'acrid', 'across', 'acting', 'added', 'addle', 'addorsed', 'adept', 'adjunct', 'admired', 'adnate', 'adored', 'adrift', 'adroit', 'adscript', 'adult', 'adunc', 'adust', 'advised', 'aery', 'afeard', 'afeared', 'affine', 'affined', 'afire', 'aflame', 'afloat', 'afoot', 'afoul', 'afraid', 'after', 'aftmost', 'agape', 'agaze', 'aged', 'ageing', 'ageless', 'agelong', 'aggrieved', 'aghast', 'agile', 'aging', 'agleam', 'agley', 'aglow', 'agnate', 'ago', 'agog', 'agone', 'agreed', 'aground', 'ahead', 'ahorse', 'ahull', 'aidful', 'aidless', 'ailing', 'aimless', 'ain', 'air', 'airborne', 'airless', 'airsick', 'airtight', 'ajar', 'akin', 'alar', 'alate', 'alert', 'algal', 'algid', 'algoid', 'alien', 'alight', 'alike', 'alined', 'alive', 'alleged', 'allowed', 'alloyed', 'alone', 'aloof', 'alright', 'altered', 'altern', 'alvine', 'amazed', 'amber', 'amiss', 'amok', 'amort', 'ample', 'amuck', 'amused', 'android', 'angled', 'anguine', 'anguished', 'anile', 'announced', 'ansate', 'anti', 'antic', 'antique', 'antlered', 'antlike', 'antrorse', 'anxious', 'apart', 'apeak', 'apish', 'appalled', 'applied', 'appressed', 'arcane', 'arching', 'argent', 'arid', 'armchair', 'armless', 'armored', 'aroid', 'aroused', 'arranged', 'arrant', 'arrased', 'arrhythmic', 'artful', 'artless', 'arty', 'ashake', 'ashamed', 'ashen', 'ashy', 'askance', 'askant', 'askew', 'asking', 'aslant', 'asleep', 'aslope', 'asphalt', 'asprawl', 'asquint', 'assumed', 'assured', 'astir', 'astral', 'astute', 'aswarm', 'athirst', 'athrill', 'atilt', 'atrip', 'attached', 'attack', 'attent', 'attired', 'attrite', 'attuned', 'audile', 'aurous', 'austere', 'averse', 'avid', 'avowed', 'awake', 'aware', 'awash', 'away', 'aweless', 'awesome', 'awestruck', 'awful', 'awheel', 'awing', 'awkward', 'awnless', 'awry', 'axile', 'azure', 'babbling', 'baccate', 'backboned', 'backhand', 'backless', 'backmost', 'backstage', 'backstair', 'backstairs', 'backswept', 'backward', 'backwoods', 'baddish', 'baffling', 'baggy', 'bairnly', 'balanced', 'balding', 'baldish', 'baleful', 'balky', 'bally', 'balmy', 'banal', 'bandaged', 'banded', 'baneful', 'bangled', 'bankrupt', 'banner', 'bannered', 'baptist', 'bar', 'barbate', 'bardic', 'bardy', 'bareback', 'barebacked', 'barefaced', 'barefoot', 'barer', 'barest', 'baric', 'barish', 'barkless', 'barky', 'barmy', 'baroque', 'barrelled', 'baseless', 'baser', 'basest', 'bashful', 'basic', 'bassy', 'bastioned', 'bated', 'battered', 'battled', 'batty', 'bausond', 'bawdy', 'beaded', 'beady', 'beaky', 'beaming', 'beamish', 'beamless', 'beamy', 'beardless', 'bearish', 'bearlike', 'beastlike', 'beastly', 'beaten', 'beating', 'beauish', 'becalmed', 'bedded', 'bedfast', 'bedight', 'bedimmed', 'bedrid', 'beechen', 'beefy', 'beery', 'beetle', 'befogged', 'begrimed', 'beguiled', 'behind', 'bellied', 'belted', 'bemazed', 'bemused', 'bended', 'bending', 'bendwise', 'bendy', 'benign', 'benthic', 'benzal', 'bereft', 'berried', 'berserk', 'besieged', 'bespoke', 'besprent', 'bestead', 'bestial', 'betrothed', 'beveled', 'biased', 'bifid', 'biform', 'bigger', 'biggest', 'biggish', 'bijou', 'bilgy', 'bilious', 'billion', 'billionth', 'bilobed', 'binate', 'biped', 'birchen', 'birdlike', 'birken', 'bistred', 'bitchy', 'bitless', 'bitten', 'bitty', 'bivalve', 'bizarre', 'blackish', 'blameful', 'blameless', 'blaring', 'blasted', 'blasting', 'blatant', 'bleary', 'blended', 'blending', 'blindfold', 'blinding', 'blinking', 'blissful', 'blissless', 'blithesome', 'bloated', 'blockish', 'blocky', 'blooded', 'bloodied', 'bloodshot', 'bloodstained', 'blooming', 'bloomless', 'bloomy', 'blotchy', 'blotto', 'blotty', 'blowhard', 'blowsy', 'blowy', 'blowzy', 'blubber', 'bluer', 'bluest', 'bluish', 'blurry', 'blushful', 'blushless', 'boarish', 'boastful', 'boastless', 'bobtail', 'bodger', 'bodied', 'boding', 'boggy', 'bogus', 'bomb', 'bombproof', 'boneless', 'bonism', 'bonkers', 'bony', 'bonzer', 'bookish', 'bookless', 'boorish', 'booted', 'bootleg', 'bootless', 'boozy', 'bordered', 'boring', 'bosker', 'bosky', 'bosom', 'bosomed', 'bossy', 'botchy', 'bouffant', 'boughten', 'bouilli', 'bouncy', 'bounded', 'bounden', 'boundless', 'bousy', 'bovid', 'bovine', 'bowing', 'boxlike', 'boyish', 'bracing', 'brackish', 'bractless', 'braggart', 'bragging', 'braided', 'brainless', 'brainsick', 'brainy', 'brakeless', 'brambly', 'branching', 'branchless', 'branchlike', 'branny', 'brashy', 'brassy', 'brattish', 'bratty', 'braver', 'bravest', 'braving', 'brawny', 'brazen', 'breaking', 'breakneck', 'breasted', 'breathless', 'breathy', 'breechless', 'breeding', 'breezeless', 'breezy', 'brickle', 'bricky', 'bridgeless', 'briefless', 'brilliant', 'brimful', 'brimless', 'brimming', 'brinded', 'brindle', 'brindled', 'brinish', 'briny', 'bristly', 'brittle', 'broadband', 'broadcast', 'broadish', 'broadloom', 'broadside', 'broch', 'broguish', 'bronzy', 'broody', 'broomy', 'browless', 'brownish', 'browny', 'bruising', 'brumal', 'brumous', 'brunet', 'brunette', 'brushless', 'brushy', 'brutal', 'brute', 'brutelike', 'brutish', 'bubbly', 'buccal', 'buckish', 'buckram', 'buckshee', 'buckskin', 'bucktooth', 'bucktoothed', 'budless', 'buggy', 'bughouse', 'buirdly', 'bulbar', 'bulbous', 'bulgy', 'bulky', 'bullate', 'bullied', 'bullish', 'bumbling', 'bumptious', 'bumpy', 'bunchy', 'bunted', 'buoyant', 'burdened', 'burghal', 'buried', 'burlesque', 'burly', 'burry', 'bursal', 'bursting', 'bushy', 'busied', 'buskined', 'bustled', 'busty', 'buttocked', 'buxom', 'bygone', 'byssal', 'caboched', 'caboshed', 'caddish', 'cadenced', 'cadent', 'cadgy', 'cagey', 'cagy', 'caitiff', 'calcic', 'calfless', 'caller', 'callous', 'callow', 'calmy', 'campy', 'cancelled', 'cancrine', 'cancroid', 'candent', 'candied', 'canine', 'cankered', 'canny', 'canty', 'cany', 'capeskin', 'caprine', 'captious', 'captive', 'cardboard', 'carefree', 'careful', 'careless', 'careworn', 'caring', 'carking', 'carlish', 'carmine', 'carnose', 'carpal', 'carping', 'carsick', 'carven', 'casebook', 'casteless', 'castled', 'catching', 'catchweight', 'catchy', 'cattish', 'catty', 'caudate', 'cauline', 'causal', 'causeless', 'cautious', 'cayenned', 'ceaseless', 'cecal', 'cedarn', 'ceilinged', 'censured', 'centered', 'centred', 'centric', 'centrist', 'centum', 'cercal', 'cerise', 'cerous', 'certain', 'cervid', 'cervine', 'cestoid', 'chaffless', 'chaffy', 'chainless', 'chairborne', 'chaliced', 'chalky', 'chambered', 'chanceful', 'chanceless', 'chancroid', 'chancrous', 'chancy', 'changeful', 'changeless', 'changing', 'chapeless', 'chargeful', 'chargeless', 'charming', 'charmless', 'charry', 'chartered', 'chartless', 'chary', 'chasmal', 'chasmic', 'chasmy', 'chasseur', 'chaster', 'chastest', 'chastised', 'chatty', 'checkered', 'checky', 'cheeky', 'cheerful', 'cheerless', 'cheerly', 'cheery', 'cheesy', 'chelate', 'chemic', 'chequy', 'cherty', 'chestnut', 'chesty', 'chevroned', 'chewy', 'chichi', 'chiefless', 'chiefly', 'chiffon', 'childing', 'childish', 'childless', 'childlike', 'childly', 'chill', 'chilly', 'chin', 'chintzy', 'chipper', 'chippy', 'chirpy', 'chiseled', 'chiselled', 'chlorous', 'chocker', 'choicer', 'chokey', 'choking', 'choky', 'chondral', 'choosey', 'choosy', 'chopping', 'choppy', 'choral', 'chordal', 'chordate', 'choric', 'chrismal', 'chronic', 'chthonic', 'chubby', 'chuffy', 'chummy', 'chunky', 'churchless', 'churchly', 'churchward', 'churchy', 'churlish', 'churning', 'chymous', 'cichlid', 'cirrate', 'cirrose', 'cirsoid', 'cissoid', 'cissy', 'cisted', 'cistic', 'citrous', 'citrus', 'clamant', 'clammy', 'clankless', 'clannish', 'clasping', 'classless', 'classy', 'clastic', 'clathrate', 'clausal', 'claustral', 'clavate', 'clawless', 'clayey', 'clayish', 'cleanly', 'cleansing', 'clerkish', 'clerkly', 'cliffy', 'clingy', 'clinquant', 'clipping', 'cliquey', 'cliquish', 'cliquy', 'clithral', 'clitic', 'clockwise', 'cloddish', 'cloddy', 'clogging', 'cloggy', 'cloistered', 'cloistral', 'clonic', 'closer', 'closest', 'clotty', 'clouded', 'cloudless', 'cloudy', 'clovered', 'clownish', 'cloying', 'clubby', 'clucky', 'clueless', 'clumpy', 'clumsy', 'clustered', 'coaly', 'coarser', 'coarsest', 'coastal', 'coastward', 'coastwise', 'coated', 'coatless', 'coccal', 'coccoid', 'cockney', 'cocksure', 'cocky', 'coffered', 'cogent', 'cognate', 'coky', 'coldish', 'collapsed', 'collect', 'colloid', 'colly', 'coltish', 'columned', 'comal', 'comate', 'combined', 'combless', 'combust', 'comely', 'comfy', 'coming', 'commie', 'commo', 'comose', 'compact', 'compelled', 'compleat', 'complete', 'compo', 'composed', 'concave', 'conceived', 'concerned', 'conchal', 'conchate', 'concise', 'condemned', 'condign', 'conferred', 'confined', 'confirmed', 'confused', 'conjoined', 'conjoint', 'conjunct', 'connate', 'conoid', 'conscious', 'constrained', 'consumed', 'contained', 'contrate', 'contrite', 'contrived', 'controlled', 'contused', 'convex', 'convict', 'convinced', 'cooing', 'cooking', 'coolish', 'copied', 'coppiced', 'corbelled', 'cordate', 'corded', 'cordial', 'cordless', 'coreless', 'corking', 'corky', 'cormous', 'cornered', 'cornute', 'corny', 'correct', 'corrupt', 'corvine', 'cosher', 'costal', 'costate', 'costive', 'costly', 'costumed', 'cottaged', 'couchant', 'counter', 'countless', 'courant', 'couthie', 'couthy', 'coxal', 'coyish', 'cozy', 'crabbed', 'crabby', 'crablike', 'crabwise', 'crackbrained', 'crackers', 'cracking', 'crackjaw', 'crackle', 'crackling', 'crackly', 'crackpot', 'craftless', 'crafty', 'cragged', 'craggy', 'cranky', 'crannied', 'crashing', 'craven', 'crawling', 'crawly', 'creaky', 'creamlaid', 'creamy', 'creasy', 'credent', 'creedal', 'creepy', 'crenate', 'crescive', 'cressy', 'crestless', 'cricoid', 'crimeless', 'crimpy', 'crimson', 'crinal', 'cringing', 'crinite', 'crinkly', 'crinoid', 'crinose', 'crippling', 'crispate', 'crispy', 'crisscross', 'cristate', 'croaky', 'crookback', 'crooked', 'crosiered', 'crossbred', 'crosstown', 'crosswise', 'croupous', 'croupy', 'crowded', 'crowing', 'crowning', 'crownless', 'crucial', 'cruder', 'crudest', 'cruel', 'crumbly', 'crumby', 'crummy', 'crumpled', 'crunchy', 'crural', 'crushing', 'crustal', 'crusted', 'crustless', 'crusty', 'crying', 'cryptal', 'cryptic', 'ctenoid', 'cubbish', 'cubist', 'cuboid', 'cultic', 'cultish', 'cultrate', 'cumbrous', 'cunning', 'cupric', 'cuprous', 'curbless', 'curdy', 'cureless', 'curly', 'currish', 'cursed', 'cursing', 'cursive', 'curtate', 'curving', 'curvy', 'cushy', 'cuspate', 'cussed', 'custom', 'cutcha', 'cuter', 'cutest', 'cyan', 'cycloid', 'cyclone', 'cymoid', 'cymose', 'cystoid', 'cytoid', 'czarist', 'daedal', 'daffy', 'daimen', 'dainty', 'daisied', 'dam', 'damaged', 'damfool', 'damning', 'dampish', 'dancing', 'dangling', 'dapper', 'dapple', 'dappled', 'daring', 'darkish', 'darkling', 'darksome', 'dashing', 'dastard', 'dated', 'dateless', 'dauby', 'dauntless', 'daylong', 'daytime', 'deathful', 'deathless', 'deathlike', 'deathly', 'deathy', 'debased', 'debauched', 'deceased', 'decent', 'declared', 'decreed', 'decurved', 'dedal', 'deedless', 'defaced', 'defiled', 'defined', 'deflexed', 'deformed', 'defunct', 'deictic', 'deism', 'deject', 'deltoid', 'demure', 'dendroid', 'denser', 'densest', 'dentate', 'dentoid', 'deposed', 'depraved', 'depressed', 'deprived', 'deranged', 'dermal', 'dermic', 'dermoid', 'dernier', 'descant', 'described', 'desert', 'deserved', 'designed', 'desired', 'desmoid', 'despised', 'destined', 'detached', 'detailed', 'deuced', 'deviled', 'devoid', 'devout', 'dewlapped', 'dewy', 'dextral', 'dextrorse', 'dextrous', 'diarch', 'dicey', 'dickey', 'dicky', 'diet', 'diffuse', 'diffused', 'dighted', 'diglot', 'dilute', 'dimmest', 'dimming', 'dimply', 'dingbats', 'dingy', 'dinkies', 'dinky', 'diplex', 'diploid', 'dippy', 'direful', 'direr', 'direst', 'dirty', 'discalced', 'disclosed', 'discoid', 'discreet', 'discrete', 'diseased', 'disgraced', 'disguised', 'dishy', 'disjoined', 'disjoint', 'disjunct', 'disliked', 'dispensed', 'disperse', 'dispersed', 'displayed', 'displeased', 'disposed', 'dissolved', 'distal', 'distent', 'distilled', 'distinct', 'distrait', 'distraught', 'distressed', 'disturbed', 'distyle', 'disused', 'divers', 'diverse', 'divorced', 'dizzied', 'dizzy', 'docile', 'dockside', 'doddered', 'dodgy', 'dogged', 'dogging', 'doggish', 'doggone', 'doggoned', 'doggy', 'doglike', 'doited', 'doleful', 'dolesome', 'dollish', 'doltish', 'donnard', 'donnered', 'donnish', 'donsie', 'dopey', 'dopy', 'dormant', 'dormie', 'dormy', 'dorty', 'dotal', 'doting', 'dotted', 'doty', 'doubling', 'doubtful', 'doubting', 'doubtless', 'doughy', 'dovelike', 'dovetailed', 'dovish', 'dowdy', 'dowie', 'downbeat', 'downhill', 'downrange', 'downright', 'downstage', 'downstair', 'downstairs', 'downstate', 'downstream', 'downwind', 'dozen', 'dozenth', 'dozing', 'dozy', 'draffy', 'drafty', 'dragging', 'draggy', 'draining', 'drastic', 'dratted', 'draughty', 'dreadful', 'dreamful', 'dreamless', 'dreamlike', 'dreamy', 'dreary', 'dreggy', 'dressy', 'drier', 'driest', 'driftless', 'drifty', 'drippy', 'driven', 'drizzly', 'droning', 'dronish', 'droopy', 'dropping', 'dropsied', 'drossy', 'droughty', 'drouthy', 'drowsing', 'drowsy', 'drudging', 'drumly', 'drunken', 'dryer', 'ducal', 'duckbill', 'duckie', 'ducky', 'ductile', 'duddy', 'dudish', 'dulcet', 'dullish', 'dumbstruck', 'dumpish', 'dun', 'dungy', 'dural', 'duskish', 'dusky', 'dustless', 'dustproof', 'dwarfish', 'dyeline', 'dying', 'earnest', 'earthborn', 'earthbound', 'earthen', 'earthly', 'earthquaked', 'earthward', 'earthy', 'easeful', 'eastbound', 'eastmost', 'eastward', 'eaten', 'eating', 'ebon', 'eccrine', 'ecru', 'edgeless', 'edging', 'edgy', 'eely', 'eerie', 'eery', 'effete', 'effluent', 'effuse', 'egal', 'eighteen', 'eighteenth', 'eightfold', 'eighty', 'elapsed', 'elder', 'eldest', 'eldritch', 'elect', 'elfin', 'elfish', 'elite', 'elmy', 'elvish', 'embowed', 'emersed', 'emptied', 'enarched', 'enate', 'encased', 'enceinte', 'endarch', 'endless', 'endmost', 'endorsed', 'endways', 'enforced', 'engorged', 'engrailed', 'engrained', 'engraved', 'enhanced', 'enjambed', 'enlarged', 'enorm', 'enough', 'enow', 'enraged', 'enrapt', 'enrolled', 'enslaved', 'enthralled', 'entire', 'entranced', 'enured', 'enwrapped', 'equine', 'equipped', 'erased', 'erect', 'ermined', 'erose', 'errant', 'errhine', 'erring', 'ersatz', 'erstwhile', 'escaped', 'essive', 'estranged', 'estrous', 'eterne', 'ethic', 'ethmoid', 'ethnic', 'eustyle', 'evens', 'evoked', 'exact', 'exarch', 'exchanged', 'excused', 'exempt', 'exhaled', 'expert', 'expired', 'exposed', 'exsert', 'extant', 'extinct', 'extrorse', 'eyeless', 'fabled', 'faceless', 'facete', 'factious', 'faddish', 'faddy', 'faded', 'fadeless', 'fading', 'faecal', 'failing', 'faintish', 'fairish', 'faithful', 'faithless', 'falcate', 'falser', 'falsest', 'fameless', 'famished', 'famous', 'fancied', 'fanfold', 'fangled', 'fangless', 'farand', 'farci', 'farfetched', 'farming', 'farouche', 'farrow', 'farther', 'farthest', 'fatal', 'fated', 'fateful', 'fatigue', 'fatigued', 'fatless', 'fatter', 'fattest', 'fattish', 'faucal', 'faucial', 'faultless', 'faulty', 'faunal', 'favored', 'favoured', 'fearful', 'fearless', 'fearsome', 'feastful', 'feathered', 'featured', 'febrile', 'fecal', 'feckless', 'fecund', 'federalist', 'feeble', 'feeblish', 'feeling', 'feisty', 'feline', 'felon', 'felsic', 'fenny', 'feodal', 'feral', 'ferine', 'ferny', 'fervent', 'fervid', 'fesswise', 'festal', 'festive', 'fetching', 'fetial', 'fetid', 'feudal', 'fewer', 'fibered', 'fibroid', 'fibrous', 'fickle', 'fictile', 'fictive', 'fiddling', 'fiddly', 'fiendish', 'fiercer', 'fiercest', 'fifteen', 'fifteenth', 'fifty', 'filar', 'filial', 'filose', 'filthy', 'filtrable', 'financed', 'fineable', 'finer', 'finest', 'fingered', 'finished', 'finite', 'finless', 'finny', 'fireproof', 'firry', 'fishy', 'fissile', 'fistic', 'fitchy', 'fitful', 'fitted', 'fitter', 'fitting', 'fivefold', 'fizzy', 'flabby', 'flaccid', 'flagging', 'flaggy', 'flagrant', 'flameproof', 'flaming', 'flamy', 'flappy', 'flaring', 'flashy', 'flatling', 'flattest', 'flattish', 'flaunty', 'flawless', 'flawy', 'flaxen', 'fleckless', 'fledgeling', 'fledgling', 'fledgy', 'fleeceless', 'fleecy', 'fleeing', 'fleeting', 'fleshless', 'fleshly', 'fleshy', 'flexile', 'flightless', 'flighty', 'flimsy', 'flinty', 'flippant', 'flipping', 'flitting', 'floaty', 'floccose', 'floccus', 'flooded', 'floodlit', 'floppy', 'florid', 'flory', 'flossy', 'floury', 'flowered', 'flowing', 'fluent', 'fluffy', 'flukey', 'fluky', 'flurried', 'fluted', 'fluty', 'flyweight', 'foamless', 'foamy', 'focused', 'focussed', 'foetal', 'foetid', 'fogbound', 'foggy', 'fogless', 'folded', 'folkish', 'folklore', 'folksy', 'fontal', 'foodless', 'foolish', 'foolproof', 'footed', 'footless', 'footling', 'footsore', 'footworn', 'foppish', 'forceful', 'forceless', 'forehand', 'foremost', 'forenamed', 'foresaid', 'foreseen', 'forespent', 'foretold', 'forfeit', 'forky', 'former', 'formless', 'fornent', 'forspent', 'forte', 'forthright', 'fortis', 'forworn', 'foughten', 'fourfold', 'fourscore', 'foursquare', 'fourteenth', 'foxy', 'fozy', 'fractious', 'fractured', 'fragile', 'fragrant', 'frantic', 'fratchy', 'fraudful', 'frazzled', 'freakish', 'freaky', 'freckly', 'freebie', 'freeborn', 'freeing', 'freer', 'freest', 'frenzied', 'frequent', 'freshman', 'fretful', 'fretted', 'fretty', 'fribble', 'friended', 'friendless', 'frightened', 'frightful', 'frilly', 'fringeless', 'frisky', 'frizzly', 'frizzy', 'frockless', 'frolic', 'fronded', 'frontal', 'frontier', 'frontless', 'frosted', 'frostless', 'frostlike', 'frosty', 'frothy', 'froward', 'frowsty', 'frowsy', 'frowzy', 'frozen', 'fructed', 'frugal', 'fruited', 'fruitful', 'fruitless', 'fruity', 'frumpish', 'frumpy', 'frustrate', 'fubsy', 'fucoid', 'fugal', 'fulfilled', 'fulgent', 'fulgid', 'fulsome', 'fulvous', 'fumy', 'funded', 'funest', 'fungal', 'fungoid', 'fungous', 'funky', 'furcate', 'furry', 'furthest', 'furtive', 'furzy', 'fuscous', 'fusil', 'fusile', 'fussy', 'fustian', 'fusty', 'futile', 'fuzzy', 'gabbroid', 'gabled', 'gadoid', 'gadrooned', 'gaga', 'gainful', 'gainless', 'gainly', 'gaited', 'galliard', 'galling', 'gallooned', 'galore', 'gamer', 'gamesome', 'gamest', 'gamey', 'gamic', 'gammy', 'gamy', 'gangling', 'gangly', 'ganoid', 'gaping', 'gardant', 'garish', 'garni', 'gassy', 'gated', 'gateless', 'gaudy', 'gaumless', 'gauzy', 'gawky', 'gawsy', 'gearless', 'geegaw', 'gelded', 'gelid', 'gemel', 'gemmate', 'gemmy', 'genal', 'genial', 'genic', 'genteel', 'genty', 'georgic', 'germane', 'gestic', 'gewgaw', 'ghastful', 'ghastly', 'ghostly', 'ghoulish', 'gibbose', 'gibbous', 'giddied', 'giddy', 'gifted', 'giggly', 'gilded', 'gimcrack', 'gimlet', 'gimpy', 'girlish', 'girly', 'giving', 'glabrate', 'glabrous', 'glacial', 'gladsome', 'glaikit', 'glairy', 'glandered', 'glaring', 'glary', 'glasslike', 'glassy', 'gleeful', 'gleesome', 'gleety', 'glenoid', 'glial', 'glibber', 'glibbest', 'globate', 'globoid', 'globose', 'gloomful', 'glooming', 'gloomy', 'glossies', 'glossy', 'glottic', 'glowing', 'gluey', 'glummer', 'glummest', 'glumpy', 'glutted', 'glyphic', 'glyptic', 'gnarly', 'gnathic', 'gneissic', 'gneissoid', 'gnomic', 'gnomish', 'goalless', 'goateed', 'goatish', 'goddam', 'goddamn', 'goddamned', 'godless', 'godlike', 'godly', 'goitrous', 'goodish', 'goodly', 'gooey', 'goofy', 'goosey', 'goosy', 'gorgeous', 'gormless', 'gorsy', 'gory', 'gouty', 'gowaned', 'goyish', 'graceful', 'graceless', 'gracile', 'gracious', 'gradely', 'grainy', 'grapey', 'grapy', 'grasping', 'graspless', 'grassy', 'grateful', 'grating', 'gratis', 'grave', 'gravel', 'graveless', 'gravest', 'gravid', 'grayish', 'greening', 'greenish', 'greensick', 'greyish', 'griefless', 'grieving', 'grimmer', 'grimmest', 'grimy', 'gripping', 'gripple', 'grippy', 'grisly', 'gristly', 'gritty', 'grizzled', 'groggy', 'groovy', 'groping', 'grotesque', 'grotty', 'grouchy', 'groundless', 'grouty', 'grubby', 'grudging', 'gruesome', 'gruffish', 'grumbly', 'grummer', 'grummest', 'grumose', 'grumous', 'grumpy', 'gruntled', 'guardant', 'guarded', 'guardless', 'guideless', 'guiding', 'guileful', 'guileless', 'guiltless', 'guilty', 'gular', 'gulfy', 'gummous', 'gummy', 'gumptious', 'gunless', 'gushy', 'gusty', 'gutless', 'gutsy', 'gutta', 'guttate', 'gyral', 'gyrate', 'gyrose', 'habile', 'hackly', 'hackneyed', 'hadal', 'haemal', 'haemic', 'haggish', 'hairless', 'hairlike', 'halest', 'halftone', 'hallowed', 'haloid', 'halting', 'hamate', 'hammered', 'hammy', 'handed', 'handled', 'handless', 'handmade', 'handsome', 'handworked', 'handwrought', 'handy', 'hangdog', 'hapless', 'haploid', 'haptic', 'harassed', 'hardback', 'hardened', 'hardwood', 'harlot', 'harmful', 'harmless', 'harnessed', 'harried', 'hastate', 'hasty', 'hatching', 'hated', 'hateful', 'hatless', 'hatted', 'haughty', 'haunted', 'haunting', 'hawkish', 'hawklike', 'haywire', 'hazy', 'headed', 'headfirst', 'headless', 'headlong', 'headmost', 'headstrong', 'heady', 'healing', 'healthful', 'healthy', 'heaping', 'heapy', 'hearted', 'heartfelt', 'hearties', 'heartless', 'heartsome', 'hearty', 'heated', 'heathen', 'heathy', 'heating', 'heavies', 'heaving', 'hectic', 'hedgy', 'heedful', 'heedless', 'heelless', 'hefty', 'heinous', 'heirless', 'hellish', 'helmless', 'helpful', 'helpless', 'hemal', 'hempen', 'hempy', 'hennaed', 'herbaged', 'herbal', 'herbless', 'herby', 'here', 'hidden', 'highbrow', 'highest', 'hilding', 'hilly', 'hinder', 'hindmost', 'hindward', 'hipper', 'hippest', 'hippy', 'hircine', 'hirsute', 'hispid', 'hissing', 'histie', 'histoid', 'hitchy', 'hither', 'hiveless', 'hivelike', 'hobnail', 'hobnailed', 'hoggish', 'hoiden', 'holey', 'hollow', 'holmic', 'holstered', 'homebound', 'homeless', 'homelike', 'homely', 'homesick', 'homespun', 'homeward', 'homey', 'homy', 'honest', 'honeyed', 'honied', 'hoodless', 'hoofless', 'hooly', 'hopeful', 'hopeless', 'hopping', 'horal', 'hornish', 'hornless', 'hornlike', 'horny', 'horrent', 'horrid', 'horsey', 'horsy', 'hotfoot', 'hotshot', 'hotter', 'hottest', 'hotting', 'hottish', 'hourlong', 'hourly', 'housebound', 'houseless', 'hoven', 'howling', 'hoyden', 'hueless', 'huffish', 'huffy', 'huger', 'hugest', 'hulking', 'hulky', 'humbler', 'humdrum', 'humic', 'humid', 'hummel', 'humpbacked', 'humpy', 'hunchback', 'hunchbacked', 'hundredth', 'hungry', 'hunky', 'hunted', 'hurling', 'hurried', 'hurtful', 'hurtless', 'hurtling', 'husky', 'hydric', 'hydro', 'hydroid', 'hydrous', 'hymnal', 'hyoid', 'hyphal', 'hypnoid', 'icky', 'ictic', 'idem', 'idled', 'idlest', 'idling', 'iffy', 'ignored', 'imbued', 'immane', 'immense', 'immersed', 'immune', 'impel', 'impelled', 'impish', 'implied', 'imposed', 'improved', 'impure', 'inane', 'inapt', 'inboard', 'inborn', 'inbound', 'inbred', 'inbreed', 'inby', 'incased', 'incensed', 'incised', 'incog', 'increased', 'incrust', 'incult', 'incurved', 'incuse', 'indign', 'indoor', 'indrawn', 'inept', 'infect', 'infelt', 'infirm', 'inflamed', 'inflexed', 'inform', 'informed', 'ingrain', 'ingrained', 'ingrate', 'ingrown', 'inhaled', 'inhumed', 'injured', 'inky', 'inlaid', 'inmost', 'innate', 'inphase', 'inrush', 'insane', 'inscribed', 'inshore', 'insides', 'inspired', 'instinct', 'insured', 'intact', 'intense', 'intent', 'intern', 'interred', 'intime', 'intoed', 'intoned', 'intown', 'introrse', 'inured', 'involved', 'inward', 'inwrought', 'irate', 'ireful', 'irksome', 'itching', 'itchy', 'ivied', 'jaded', 'jadish', 'jagged', 'jaggy', 'jammy', 'jangly', 'jannock', 'japan', 'jarring', 'jasp', 'jaundiced', 'jazzy', 'jealous', 'jejune', 'jellied', 'jerky', 'jessant', 'jestful', 'jesting', 'jet', 'jetting', 'jetty', 'jeweled', 'jewelled', 'jiggered', 'jiggish', 'jiggly', 'jingly', 'jobless', 'jocose', 'jocund', 'jointed', 'jointless', 'jointured', 'joking', 'jolty', 'jouncing', 'jowly', 'joyful', 'joyless', 'joyous', 'jubate', 'jugal', 'jugate', 'juiceless', 'juicy', 'jumbled', 'jumpy', 'jungly', 'jural', 'jurant', 'jussive', 'jutting', 'kacha', 'kaput', 'karmic', 'karstic', 'kayoed', 'kerchiefed', 'keyless', 'khaki', 'kidnapped', 'killing', 'kilted', 'kindless', 'kindly', 'kindred', 'kingless', 'kinglike', 'kingly', 'kinky', 'kinless', 'kirtled', 'kittle', 'klephtic', 'klutzy', 'knaggy', 'knavish', 'kneeling', 'knickered', 'knifeless', 'knightless', 'knightly', 'knitted', 'knobby', 'knotless', 'knotted', 'knotty', 'knowing', 'knuckly', 'knurly', 'kookie', 'kooky', 'kosher', 'kutcha', 'labelled', 'labile', 'labored', 'laboured', 'labrid', 'labroid', 'lacking', 'lacy', 'laddish', 'laden', 'laggard', 'laic', 'lairy', 'laky', 'lambdoid', 'lambent', 'lamblike', 'lamer', 'lamest', 'laming', 'lanate', 'landed', 'landless', 'landscaped', 'landward', 'languid', 'lanky', 'lanose', 'lapelled', 'lapstrake', 'larboard', 'larger', 'largest', 'largish', 'larine', 'larkish', 'larky', 'larval', 'lashing', 'lasting', 'lated', 'lateen', 'later', 'latest', 'lathlike', 'lathy', 'latish', 'latter', 'latticed', 'laurelled', 'lavish', 'lawful', 'lawless', 'lawny', 'leachy', 'leaden', 'leadless', 'leady', 'leafless', 'leafy', 'leaky', 'leaning', 'leaping', 'learned', 'leary', 'leathern', 'ledgy', 'leery', 'leftward', 'legged', 'leggy', 'legit', 'legless', 'leisure', 'leisured', 'lengthways', 'lengthwise', 'lengthy', 'lenis', 'lenten', 'lentic', 'lento', 'lentoid', 'leprose', 'leprous', 'lettered', 'licenced', 'licensed', 'licit', 'lidded', 'lidless', 'liege', 'lifeful', 'lifeless', 'lifelike', 'lifelong', 'lighted', 'lightfast', 'lightful', 'lightish', 'lightless', 'lightsome', 'lightweight', 'lignite', 'likely', 'lilied', 'limbate', 'limbless', 'limey', 'limpid', 'limy', 'liney', 'lingual', 'linty', 'liny', 'lipless', 'lipoid', 'lippy', 'lissom', 'lissome', 'listless', 'lither', 'lithesome', 'lithest', 'lithic', 'litho', 'lithoid', 'litten', 'littler', 'littlest', 'livelong', 'lively', 'livid', 'loaded', 'loamy', 'loathful', 'loathly', 'loathsome', 'lobar', 'lobate', 'lobose', 'lofty', 'logy', 'lonesome', 'longer', 'longhand', 'longing', 'longish', 'longsome', 'longwall', 'longwise', 'looking', 'loonies', 'loopy', 'looser', 'loosest', 'lordless', 'lordly', 'losel', 'losing', 'lossy', 'lotic', 'loudish', 'lounging', 'louring', 'loury', 'lousy', 'loutish', 'louvered', 'louvred', 'loveless', 'lovelorn', 'lovely', 'lovesick', 'lovesome', 'lowly', 'loyal', 'lozenged', 'lubric', 'lucent', 'lucid', 'luckless', 'lukewarm', 'lumpen', 'lumpish', 'lunate', 'lupine', 'lurdan', 'lurid', 'luscious', 'lushy', 'lustful', 'lustral', 'lustred', 'lustrous', 'lusty', 'lying', 'lymphoid', 'lyrate', 'lyric', 'macled', 'madcap', 'maddest', 'madding', 'maigre', 'mainstream', 'maintained', 'makeless', 'makeshift', 'malar', 'male', 'malign', 'malty', 'mammoth', 'man', 'maneless', 'manful', 'mangey', 'mangy', 'manic', 'manky', 'manlike', 'mannered', 'mannish', 'mansard', 'mantic', 'many', 'marching', 'mardy', 'marish', 'maroon', 'married', 'marshy', 'masking', 'massive', 'massy', 'mastless', 'mastoid', 'matchless', 'mated', 'matey', 'matin', 'matted', 'mature', 'maudlin', 'maungy', 'mawkish', 'maxi', 'mazy', 'meager', 'meagre', 'meaning', 'measled', 'measly', 'measured', 'meaty', 'medley', 'melic', 'mellow', 'mensal', 'menseful', 'menseless', 'mere', 'merest', 'merging', 'mesarch', 'meshed', 'mesic', 'messier', 'messy', 'metalled', 'mettled', 'mickle', 'middling', 'midget', 'midi', 'midmost', 'midship', 'midships', 'miffy', 'mighty', 'migrant', 'milkless', 'million', 'millionth', 'millrun', 'mimic', 'mincing', 'minded', 'mindful', 'mindless', 'mingy', 'mini', 'minim', 'minion', 'minute', 'mirky', 'mirthful', 'mirthless', 'miry', 'mis', 'misformed', 'mislaid', 'misproud', 'missive', 'misty', 'mistyped', 'misused', 'mitered', 'mizzen', 'mnemic', 'moanful', 'mobbish', 'model', 'modeled', 'modest', 'modish', 'molal', 'molar', 'moldy', 'molten', 'monarch', 'moneyed', 'monger', 'mongrel', 'monied', 'monism', 'monkish', 'mono', 'monstrous', 'montane', 'monthly', 'mony', 'moody', 'moonish', 'moonless', 'moonlit', 'moonstruck', 'moony', 'moory', 'mopey', 'mopy', 'mordant', 'moreish', 'morish', 'morose', 'mossy', 'motey', 'mothy', 'motile', 'motored', 'mottled', 'mounted', 'mournful', 'mousey', 'mousy', 'mouthless', 'mouthy', 'moveless', 'mowburnt', 'mucid', 'mucking', 'muckle', 'mucky', 'mucoid', 'muddy', 'muggy', 'muley', 'mulish', 'mulley', 'mumchance', 'mundane', 'mural', 'murine', 'murky', 'murrey', 'muscid', 'muscly', 'museful', 'mushy', 'musing', 'musky', 'mussy', 'mustached', 'musty', 'mutant', 'muted', 'muzzy', 'mythic', 'nacred', 'nagging', 'naggy', 'naiant', 'naif', 'nailless', 'naissant', 'naive', 'nameless', 'naming', 'napless', 'napping', 'nappy', 'nary', 'nascent', 'nasty', 'natant', 'natty', 'naughty', 'nauseous', 'needful', 'needless', 'needy', 'negroid', 'neighbor', 'neighbour', 'nephric', 'nerval', 'nervate', 'nerveless', 'nervine', 'nervy', 'nescient', 'nested', 'nestlike', 'netted', 'nettly', 'neural', 'neuron', 'neuter', 'newborn', 'newish', 'newsless', 'newsy', 'nicer', 'nicest', 'nifty', 'niggard', 'niggling', 'nightless', 'nightlong', 'nightly', 'nimble', 'nimbused', 'ninefold', 'nineteen', 'ninety', 'nipping', 'nippy', 'nitid', 'nitty', 'nival', 'nobby', 'nocent', 'nodal', 'nodding', 'nodose', 'nodous', 'noiseless', 'noisette', 'noisome', 'noisy', 'nonplused', 'nonplussed', 'nonstick', 'northmost', 'northward', 'nosey', 'notal', 'notchy', 'noted', 'noteless', 'noticed', 'notour', 'novel', 'novice', 'noxious', 'nubbly', 'nubile', 'nudist', 'numbing', 'nuptial', 'nutant', 'nutlike', 'nutmegged', 'nutty', 'nymphal', 'oafish', 'oaken', 'oarless', 'oaten', 'obese', 'oblate', 'obliged', 'oblique', 'oblong', 'obscene', 'obscure', 'observed', 'obtect', 'obtuse', 'obverse', 'occult', 'ocher', 'ochre', 'ocker', 'oddball', 'offbeat', 'offhand', 'offish', 'offscreen', 'offshore', 'offside', 'often', 'oily', 'okay', 'olden', 'older', 'oldest', 'olid', 'only', 'onshore', 'onside', 'onstage', 'onward', 'oozing', 'oozy', 'ornate', 'orphan', 'ortho', 'oscine', 'osiered', 'osmic', 'osmous', 'otic', 'outback', 'outboard', 'outbound', 'outbred', 'outcast', 'outcaste', 'outdone', 'outdoor', 'outland', 'outlaw', 'outlined', 'outmost', 'outraged', 'outright', 'outsize', 'outsized', 'outspread', 'outworn', 'ovate', 'over', 'overt', 'ovine', 'ovoid', 'owing', 'owlish', 'owllike', 'packaged', 'padded', 'pagan', 'painful', 'painless', 'paler', 'palest', 'paling', 'palish', 'pallid', 'pally', 'palmar', 'palmate', 'palmy', 'palpate', 'palsied', 'paltry', 'paly', 'pan', 'paneled', 'panniered', 'panzer', 'papist', 'pappose', 'pappy', 'par', 'pardine', 'parklike', 'parky', 'parlous', 'parol', 'parotid', 'parted', 'partite', 'pass', 'passant', 'passless', 'pasteboard', 'pasted', 'pastel', 'pasties', 'pasty', 'patchy', 'patent', 'pathic', 'pathless', 'patient', 'paunchy', 'pausal', 'pauseful', 'pauseless', 'pavid', 'pawky', 'payoff', 'peaceful', 'peaceless', 'peachy', 'peaked', 'peaky', 'pearlized', 'peaty', 'pebbly', 'peccant', 'peckish', 'pedal', 'pedate', 'peddling', 'peeling', 'peerless', 'peevish', 'peewee', 'peltate', 'pelting', 'pencilled', 'pendant', 'pendent', 'pending', 'penile', 'pennate', 'pennied', 'pennoned', 'pensile', 'pensive', 'peppy', 'perceived', 'percent', 'percoid', 'perished', 'perjured', 'perky', 'perplexed', 'perverse', 'pesky', 'petalled', 'petite', 'petrous', 'pettish', 'pewter', 'phaseless', 'phasic', 'phasmid', 'phatic', 'phlegmy', 'phocine', 'phonal', 'phoney', 'phonic', 'phony', 'photic', 'phrenic', 'phthisic', 'phylloid', 'physic', 'piano', 'picked', 'pickled', 'picky', 'pictured', 'piddling', 'piebald', 'piecemeal', 'piercing', 'piggie', 'piggish', 'pillaged', 'pillared', 'pilose', 'pimpled', 'pimply', 'pinchbeck', 'piney', 'pinguid', 'pinkish', 'pinnate', 'pinpoint', 'piny', 'pious', 'pipeless', 'pipelike', 'piping', 'pipy', 'piquant', 'piscine', 'pitchy', 'pithy', 'pitted', 'placeless', 'placid', 'placoid', 'plagal', 'plaguey', 'plaguy', 'plaided', 'plaintive', 'plangent', 'plantar', 'plantless', 'plashy', 'plastered', 'plastics', 'plated', 'platy', 'plausive', 'playful', 'pleading', 'pleasing', 'plebby', 'pleural', 'pliant', 'plical', 'plicate', 'plodding', 'plosive', 'plotful', 'plotless', 'plucky', 'plumaged', 'plumate', 'plumbic', 'plumbless', 'plumbous', 'plummy', 'plumose', 'plumy', 'plusher', 'plushest', 'poachy', 'pockmarked', 'pocky', 'podgy', 'poignant', 'pointing', 'pointless', 'pokey', 'pokies', 'poky', 'polished', 'polite', 'pollened', 'poltroon', 'pompous', 'ponceau', 'pongid', 'poorly', 'poppied', 'porcine', 'porky', 'porous', 'porrect', 'portly', 'possessed', 'postern', 'postiche', 'postponed', 'potent', 'potted', 'potty', 'powered', 'practic', 'practiced', 'practised', 'praising', 'prayerful', 'prayerless', 'preachy', 'preborn', 'precast', 'precise', 'prefab', 'preggers', 'pregnant', 'premed', 'premier', 'premiere', 'premorse', 'prepared', 'prepense', 'preschool', 'prescribed', 'prescript', 'present', 'preserved', 'preset', 'pressing', 'pressor', 'presto', 'presumed', 'pretend', 'pretty', 'prewar', 'priceless', 'pricey', 'pricy', 'prideful', 'prideless', 'priestly', 'priggish', 'primal', 'primate', 'primsie', 'princely', 'printed', 'printless', 'prissy', 'pristine', 'privies', 'probing', 'produced', 'profane', 'profaned', 'professed', 'profound', 'profuse', 'prolate', 'prolix', 'pronounced', 'proposed', 'proscribed', 'prostate', 'prostrate', 'prostyle', 'prosy', 'proven', 'provoked', 'prowessed', 'proxy', 'prudent', 'prudish', 'prunted', 'prying', 'pseudo', 'psycho', 'pubic', 'pucka', 'puddly', 'pudgy', 'puffy', 'puggish', 'puggy', 'puisne', 'pukka', 'puling', 'pulpy', 'pulsing', 'punchy', 'punctate', 'punctured', 'pungent', 'punkah', 'puny', 'pupal', 'purblind', 'purer', 'purest', 'purging', 'purplish', 'purpure', 'pursued', 'pursy', 'pushing', 'pushy', 'pussy', 'putrid', 'pygmoid', 'pyknic', 'pyoid', 'quadrate', 'quadric', 'quaggy', 'quaky', 'qualmish', 'quantal', 'quartan', 'quartered', 'quartic', 'quartile', 'queasy', 'queenless', 'queenly', 'quenchless', 'quibbling', 'quickset', 'quiet', 'quilted', 'quinate', 'quinoid', 'quinsied', 'quintan', 'quintic', 'quippish', 'quirky', 'quondam', 'rabic', 'rabid', 'racemed', 'racing', 'racist', 'racy', 'raddled', 'raffish', 'raging', 'rainier', 'rainless', 'rainproof', 'raising', 'rakehell', 'rakish', 'ralline', 'ramal', 'rambling', 'rammish', 'ramose', 'rampant', 'ramstam', 'rancid', 'randie', 'randy', 'rangy', 'ranking', 'raploch', 'rarer', 'rarest', 'raring', 'rascal', 'rasping', 'raspy', 'ratite', 'ratlike', 'rattish', 'rattling', 'rattly', 'ratty', 'raucous', 'raunchy', 'ravaged', 'raving', 'rawboned', 'rawish', 'rayless', 'rearmost', 'rearward', 'reasoned', 'rebel', 'reborn', 'rebuked', 'reckless', 'recluse', 'record', 'rectal', 'recurved', 'redder', 'reddest', 'reddish', 'reedy', 'reeky', 'refer', 'refined', 'regal', 'regent', 'regnal', 'regnant', 'released', 'relieved', 'remiss', 'remnant', 'removed', 'rending', 'renowned', 'rental', 'repand', 'repent', 'replete', 'reproved', 'reptant', 'reptile', 'required', 'rescued', 'resigned', 'resolved', 'restful', 'resting', 'restive', 'restless', 'restored', 'retail', 'retained', 'retired', 'retral', 'retrorse', 'retuse', 'revealed', 'revered', 'reviled', 'revived', 'revolved', 'rheumy', 'rhinal', 'rhodic', 'rhomboid', 'rhotic', 'rhythmic', 'riant', 'ribald', 'ribless', 'riblike', 'ridden', 'rident', 'ridgy', 'riftless', 'righteous', 'rightful', 'rightish', 'rightist', 'rightward', 'rigid', 'riming', 'rimless', 'rimose', 'rimy', 'rindless', 'rindy', 'ringent', 'ringless', 'ripping', 'ripply', 'risen', 'risky', 'riteless', 'ritzy', 'rival', 'riven', 'roadless', 'roasting', 'robust', 'rodded', 'rodless', 'rodlike', 'roguish', 'roily', 'rollneck', 'rompish', 'roofless', 'rooky', 'roomy', 'rooted', 'rootless', 'rootlike', 'ropy', 'roseless', 'roselike', 'rostral', 'rosy', 'rotate', 'rotted', 'rotting', 'rotund', 'roughcast', 'roughish', 'rounded', 'rounding', 'roundish', 'roupy', 'rousing', 'routed', 'routine', 'rowdy', 'rubbly', 'rubied', 'rubric', 'rudish', 'rueful', 'ruffled', 'rufous', 'rugged', 'rugose', 'ruling', 'rumbly', 'rummy', 'rumpless', 'runic', 'runny', 'runtish', 'runty', 'rushing', 'rushy', 'russet', 'rustic', 'rustred', 'rusty', 'ruthful', 'ruthless', 'rutted', 'ruttish', 'rutty', 'saclike', 'sacral', 'sadist', 'sagging', 'said', 'sainted', 'saintly', 'saline', 'sallow', 'saltant', 'salted', 'saltier', 'saltish', 'saltless', 'salty', 'salving', 'sandalled', 'sanded', 'sandy', 'saner', 'sanest', 'sanguine', 'sapid', 'sapless', 'sappy', 'sarcoid', 'sarcous', 'sarky', 'sassy', 'sated', 'satem', 'saucy', 'saut', 'saving', 'savvy', 'scabby', 'scabrous', 'scaldic', 'scalelike', 'scalene', 'scalpless', 'scampish', 'scandent', 'scanty', 'scaphoid', 'scarcer', 'scarcest', 'scarless', 'scary', 'scatheless', 'scathing', 'scatty', 'scentless', 'sceptral', 'scheming', 'schistose', 'schizo', 'schizoid', 'schmaltzy', 'schmalzy', 'scientific', 'scincoid', 'scirrhoid', 'scirrhous', 'scissile', 'scleroid', 'sclerosed', 'sclerous', 'scombrid', 'scombroid', 'scopate', 'scornful', 'scraggly', 'scraggy', 'scrambled', 'scrannel', 'scrappy', 'scratchless', 'scratchy', 'scrawly', 'scrawny', 'screaky', 'screeching', 'screwy', 'scribal', 'scrimpy', 'scroddled', 'scroggy', 'scrotal', 'scrubbed', 'scrubby', 'scruffy', 'scrumptious', 'sculptured', 'scummy', 'scungy', 'scurrile', 'scurry', 'scurvy', 'scutate', 'seaboard', 'seaborne', 'seamless', 'seamy', 'searching', 'seasick', 'seatless', 'seaward', 'second', 'sectile', 'secund', 'secure', 'sedate', 'sedgy', 'seduced', 'seedless', 'seedy', 'seeing', 'seeking', 'seely', 'seeming', 'seemly', 'seismal', 'seismic', 'sejant', 'select', 'selfish', 'selfless', 'selfsame', 'semi', 'senile', 'sensate', 'senseless', 'septal', 'septate', 'sequent', 'sequined', 'seral', 'serene', 'serfish', 'serflike', 'serrate', 'serried', 'serviced', 'servo', 'setose', 'severe', 'sexism', 'sexist', 'sexless', 'sextan', 'sexy', 'shabby', 'shaded', 'shadeless', 'shadowed', 'shady', 'shaftless', 'shaken', 'shaky', 'shallow', 'shalwar', 'shamefaced', 'shameful', 'shameless', 'shapeless', 'shapely', 'shaping', 'shaven', 'shawlless', 'sheathy', 'sheepish', 'shellproof', 'shelly', 'shickered', 'shieldless', 'shieldlike', 'shier', 'shiest', 'shiftless', 'shifty', 'shingly', 'shining', 'shiny', 'shipboard', 'shipless', 'shipshape', 'shirtless', 'shirty', 'shocking', 'shoddy', 'shoeless', 'shopworn', 'shoreless', 'shoreward', 'shortcut', 'shortish', 'shorty', 'shotten', 'showy', 'shredded', 'shredless', 'shrewish', 'shrieval', 'shrinelike', 'shrouding', 'shroudless', 'shrubby', 'shrunken', 'shyer', 'shyest', 'sicker', 'sicklied', 'sickly', 'sideling', 'sidelong', 'sideward', 'sideways', 'sighful', 'sighted', 'sightless', 'sightly', 'sigmate', 'silenced', 'silken', 'silty', 'silvan', 'silvern', 'simplex', 'sincere', 'sinful', 'singing', 'singsong', 'sinless', 'sinning', 'sissy', 'sister', 'sixfold', 'sixteen', 'sixty', 'sizy', 'skaldic', 'sketchy', 'skewbald', 'skidproof', 'skilful', 'skillful', 'skimpy', 'skinking', 'skinless', 'skinny', 'skirtless', 'skittish', 'skyward', 'slaggy', 'slakeless', 'slangy', 'slantwise', 'slapstick', 'slashing', 'slaty', 'slavish', 'sleazy', 'sleekit', 'sleeky', 'sleepless', 'sleepwalk', 'sleepy', 'sleety', 'sleeveless', 'slender', 'slickered', 'slier', 'sliest', 'slighting', 'slimline', 'slimmer', 'slimmest', 'slimming', 'slimsy', 'slimy', 'slinky', 'slippy', 'slipshod', 'sloping', 'sloshy', 'slothful', 'slouchy', 'sloughy', 'sludgy', 'sluggard', 'sluggish', 'sluicing', 'slumbrous', 'slummy', 'slushy', 'sluttish', 'smacking', 'smallish', 'smarmy', 'smartish', 'smarty', 'smashing', 'smeary', 'smectic', 'smelly', 'smileless', 'smiling', 'smitten', 'smokeproof', 'smoking', 'smothered', 'smugger', 'smuggest', 'smutty', 'snafu', 'snaggy', 'snakelike', 'snaky', 'snappish', 'snappy', 'snarly', 'snatchy', 'snazzy', 'sneaking', 'sneaky', 'snider', 'snidest', 'sniffy', 'snippy', 'snobbish', 'snoopy', 'snooty', 'snoozy', 'snoring', 'snotty', 'snouted', 'snowless', 'snowlike', 'snubby', 'snuffly', 'snuffy', 'snugger', 'snuggest', 'snugging', 'soapless', 'soapy', 'soaring', 'sober', 'socko', 'sodden', 'softish', 'softwood', 'soggy', 'sola', 'solemn', 'soli', 'sollar', 'solus', 'solute', 'solvent', 'somber', 'sombre', 'sombrous', 'sometime', 'sonant', 'songful', 'songless', 'sonless', 'sonsie', 'sonsy', 'soothfast', 'soothing', 'sopping', 'soppy', 'sordid', 'sorer', 'sorest', 'sorry', 'sotted', 'sottish', 'soulful', 'soulless', 'soundless', 'soundproof', 'soupy', 'sourish', 'southmost', 'southpaw', 'southward', 'sovran', 'sozzled', 'spaceless', 'spacial', 'spacious', 'spadelike', 'spangly', 'spanking', 'sparid', 'sparing', 'sparkless', 'sparkling', 'sparoid', 'sparry', 'sparser', 'sparsest', 'spastic', 'spathic', 'spathose', 'spatial', 'spavined', 'specious', 'speckled', 'speckless', 'speechless', 'speedful', 'speeding', 'speedless', 'speedy', 'spellbound', 'spendthrift', 'spermic', 'spermous', 'sphagnous', 'sphenic', 'spheral', 'sphereless', 'spherelike', 'spheric', 'sphery', 'sphygmic', 'sphygmoid', 'spicate', 'spicy', 'spiffing', 'spiffy', 'spiky', 'spindling', 'spindly', 'spineless', 'spinose', 'spinous', 'spiral', 'spirant', 'spireless', 'spiroid', 'spiry', 'spiteful', 'splanchnic', 'splashy', 'spleenful', 'spleenish', 'spleeny', 'splendent', 'splendid', 'splendrous', 'splenic', 'splitting', 'splurgy', 'spoken', 'spokewise', 'spongy', 'spooky', 'spoony', 'sportful', 'sportive', 'sportless', 'sporty', 'spotless', 'spotty', 'spousal', 'spouseless', 'spouted', 'spoutless', 'spriggy', 'sprightful', 'sprightly', 'springing', 'springless', 'springlike', 'springtime', 'springy', 'sprucer', 'sprucest', 'sprucing', 'spryer', 'spryest', 'spunky', 'spurless', 'squabby', 'squalid', 'squally', 'squamate', 'squamous', 'squarish', 'squarrose', 'squashy', 'squeaky', 'squeamish', 'squiffy', 'squiggly', 'squirmy', 'squirting', 'squishy', 'stabbing', 'stabile', 'stagey', 'stagnant', 'stagy', 'stalkless', 'stalky', 'stalwart', 'stalworth', 'stannous', 'staple', 'starboard', 'starchy', 'staring', 'starless', 'starlight', 'starlike', 'starring', 'starry', 'starveling', 'starving', 'statant', 'stated', 'stateless', 'stateside', 'statewide', 'statist', 'stative', 'statued', 'steadfast', 'stealthy', 'steamtight', 'steamy', 'stedfast', 'steepled', 'stelar', 'stellar', 'stellate', 'stemless', 'stenosed', 'stepwise', 'steric', 'sterile', 'sternal', 'sternmost', 'sthenic', 'stickit', 'stiffish', 'stifling', 'stilly', 'stilted', 'stingless', 'stingy', 'stinko', 'stintless', 'stirless', 'stirring', 'stockinged', 'stockish', 'stockless', 'stocky', 'stodgy', 'stolen', 'stolid', 'stoneground', 'stoneless', 'stoneware', 'stonkered', 'stopless', 'stopping', 'store', 'storeyed', 'storied', 'stormbound', 'stormless', 'stormproof', 'stotious', 'stoutish', 'straining', 'strangest', 'strapless', 'strapping', 'stratous', 'strawless', 'strawlike', 'streaky', 'streaming', 'streamless', 'streamlined', 'streamy', 'stressful', 'stretchy', 'striate', 'stricken', 'strident', 'strifeful', 'strifeless', 'strigose', 'stringent', 'stringless', 'stringy', 'stripeless', 'stripy', 'strobic', 'strongish', 'strophic', 'stroppy', 'structured', 'strutting', 'strychnic', 'stubbled', 'stubbly', 'stubborn', 'stubby', 'studied', 'stuffy', 'stumbling', 'stumpy', 'stunning', 'stupid', 'sturdied', 'sturdy', 'stutter', 'stylar', 'styleless', 'stylised', 'stylish', 'stylized', 'styloid', 'subdued', 'subfusc', 'subgrade', 'sublimed', 'submerged', 'submersed', 'submiss', 'subscribed', 'subscript', 'subtile', 'subtle', 'succinct', 'suchlike', 'suffused', 'sugared', 'suited', 'sulcate', 'sulfa', 'sulkies', 'sulky', 'sullen', 'sullied', 'sultry', 'sunbaked', 'sunbeamed', 'sunburnt', 'sunfast', 'sunken', 'sunless', 'sunlike', 'sunlit', 'sunproof', 'sunrise', 'sunset', 'sunward', 'super', 'superb', 'supine', 'supple', 'supposed', 'sural', 'surbased', 'surer', 'surest', 'surfy', 'surgeless', 'surging', 'surgy', 'surly', 'surpliced', 'surplus', 'surprised', 'suspect', 'svelter', 'sveltest', 'swainish', 'swampy', 'swanky', 'swaraj', 'swarthy', 'sweated', 'sweaty', 'sweeping', 'sweetmeal', 'swelling', 'sweptwing', 'swindled', 'swingeing', 'swinish', 'swirly', 'swishy', 'swordless', 'swordlike', 'sylphic', 'sylphid', 'sylphish', 'sylphy', 'sylvan', 'systemless', 'taboo', 'tabu', 'tacit', 'tacky', 'tactful', 'tactile', 'tactless', 'tailing', 'tailless', 'taillike', 'tailored', 'taintless', 'taken', 'taking', 'talcose', 'talking', 'talky', 'taloned', 'tameless', 'tamer', 'tamest', 'taming', 'tandem', 'tangier', 'tangled', 'tangy', 'tannic', 'tapeless', 'tapelike', 'tardy', 'tarmac', 'tarnal', 'tarot', 'tarry', 'tarsal', 'tartish', 'tasseled', 'tasselled', 'tasteful', 'tasteless', 'tasty', 'tattered', 'tatty', 'taurine', 'tawdry', 'tawie', 'tearful', 'tearing', 'tearless', 'teary', 'teasing', 'techy', 'teeming', 'teenage', 'teensy', 'teeny', 'telic', 'telling', 'telltale', 'tempered', 'templed', 'tempting', 'tender', 'tenfold', 'tenor', 'tenseless', 'tenser', 'tensest', 'tensing', 'tensive', 'tented', 'tentie', 'tentless', 'tenty', 'tepid', 'terbic', 'terete', 'tergal', 'termless', 'ternate', 'terrene', 'tertial', 'tertian', 'testate', 'testy', 'tetchy', 'textbook', 'textile', 'textless', 'textured', 'thallic', 'thalloid', 'thallous', 'thankful', 'thankless', 'thatchless', 'thecal', 'thecate', 'theism', 'theist', 'themeless', 'thenar', 'thermic', 'theroid', 'thetic', 'thickset', 'thievish', 'thinking', 'thinnish', 'thirdstream', 'thirstless', 'thirsty', 'thirteen', 'thistly', 'thornless', 'thorny', 'thoughtful', 'thoughtless', 'thousandth', 'thowless', 'thrashing', 'threadbare', 'threadlike', 'thready', 'threatful', 'threefold', 'threescore', 'thriftless', 'thrifty', 'thrilling', 'throaty', 'throbbing', 'throbless', 'throneless', 'throwback', 'thudding', 'thuggish', 'thumbless', 'thumblike', 'thumping', 'thymic', 'thymy', 'thyrsoid', 'ticklish', 'tiddly', 'tideless', 'tidied', 'tightknit', 'timbered', 'timeless', 'timely', 'timeous', 'timid', 'tingly', 'tinhorn', 'tinkling', 'tinkly', 'tinny', 'tinsel', 'tintless', 'tiny', 'tippy', 'tiptoe', 'tiptop', 'tireless', 'tiresome', 'titled', 'toeless', 'toey', 'togaed', 'togate', 'toilful', 'toilsome', 'tombless', 'tonal', 'toneless', 'tongueless', 'tonguelike', 'tonish', 'tonnish', 'tony', 'toothless', 'toothlike', 'toothsome', 'toothy', 'topfull', 'topless', 'topmost', 'torose', 'torpid', 'torquate', 'torrent', 'tortile', 'tortious', 'tortured', 'tother', 'touching', 'touchy', 'toughish', 'touring', 'tourist', 'toward', 'towered', 'townish', 'townless', 'towy', 'toxic', 'toyless', 'toylike', 'traceless', 'trackless', 'tractile', 'tractrix', 'trainless', 'tranquil', 'transcribed', 'transient', 'transposed', 'traplike', 'trappy', 'trashy', 'traveled', 'travelled', 'traverse', 'treacly', 'treasured', 'treen', 'trembling', 'trembly', 'trenchant', 'trendy', 'tressured', 'tressy', 'tribal', 'tribeless', 'trichoid', 'trickish', 'trickless', 'tricksome', 'tricksy', 'tricky', 'tricorn', 'trident', 'trifid', 'trifling', 'triform', 'trillion', 'trillionth', 'trilobed', 'trinal', 'triploid', 'trippant', 'tripping', 'tristful', 'triter', 'tritest', 'triune', 'trivalve', 'trochal', 'trochoid', 'trodden', 'trophic', 'trophied', 'tropic', 'troppo', 'trothless', 'troublous', 'truant', 'truceless', 'truer', 'truffled', 'truncate', 'trunnioned', 'trustful', 'trusting', 'trustless', 'trusty', 'truthful', 'truthless', 'tryptic', 'tsarism', 'tsarist', 'tubal', 'tubate', 'tubby', 'tubeless', 'tumbling', 'tumid', 'tuneful', 'tuneless', 'turbaned', 'turbid', 'turdine', 'turfy', 'turgent', 'turgid', 'tuskless', 'tussal', 'tussive', 'tutti', 'twaddly', 'tweedy', 'twelvefold', 'twenty', 'twiggy', 'twinkling', 'twinning', 'twofold', 'typal', 'typhous', 'typic', 'ugsome', 'ullaged', 'umber', 'umbral', 'umbrose', 'umpteen', 'umpteenth', 'unaimed', 'unaired', 'unapt', 'unarmed', 'unasked', 'unawed', 'unbacked', 'unbagged', 'unbaked', 'unbarbed', 'unbarred', 'unbathed', 'unbegged', 'unbent', 'unbid', 'unblamed', 'unbleached', 'unblenched', 'unblent', 'unblessed', 'unblocked', 'unblown', 'unboned', 'unborn', 'unborne', 'unbought', 'unbound', 'unbowed', 'unbraced', 'unbranched', 'unbreached', 'unbreathed', 'unbred', 'unbreeched', 'unbridged', 'unbroke', 'unbruised', 'unbrushed', 'unburned', 'unburnt', 'uncaged', 'uncalled', 'uncapped', 'uncashed', 'uncaught', 'uncaused', 'unchained', 'unchanged', 'uncharge', 'uncharged', 'uncharmed', 'unchaste', 'unchecked', 'uncheered', 'unchewed', 'unclad', 'unclaimed', 'unclassed', 'unclean', 'uncleaned', 'uncleansed', 'unclear', 'uncleared', 'unclimbed', 'unclipped', 'unclogged', 'unclutched', 'uncocked', 'uncoined', 'uncombed', 'uncooked', 'uncouth', 'uncropped', 'uncross', 'uncrowned', 'unculled', 'uncurbed', 'uncured', 'uncursed', 'uncurved', 'uncut', 'undamped', 'undeaf', 'undealt', 'undecked', 'undimmed', 'undipped', 'undocked', 'undone', 'undrained', 'undraped', 'undrawn', 'undreamed', 'undreamt', 'undress', 'undressed', 'undried', 'undrilled', 'undrowned', 'undrunk', 'undubbed', 'undue', 'undug', 'undulled', 'undyed', 'unfair', 'unfanned', 'unfeared', 'unfed', 'unfelled', 'unfelt', 'unfenced', 'unfiled', 'unfilled', 'unfilmed', 'unfine', 'unfired', 'unfirm', 'unfished', 'unfit', 'unflawed', 'unfledged', 'unflushed', 'unfooled', 'unforced', 'unforged', 'unformed', 'unfought', 'unfound', 'unframed', 'unfraught', 'unfree', 'unfunded', 'unfurred', 'ungalled', 'ungauged', 'ungeared', 'ungilt', 'ungirthed', 'unglad', 'unglazed', 'unglossed', 'ungloved', 'ungored', 'ungorged', 'ungowned', 'ungraced', 'ungrassed', 'ungrazed', 'ungroomed', 'unground', 'ungrown', 'ungrudged', 'ungual', 'unguessed', 'unguled', 'ungummed', 'ungyved', 'unhacked', 'unhailed', 'unhanged', 'unharmed', 'unhatched', 'unhealed', 'unheard', 'unhelped', 'unhewn', 'unhinged', 'unhired', 'unhooped', 'unhorsed', 'unhung', 'unhurt', 'unhusked', 'unique', 'unjust', 'unkempt', 'unkenned', 'unkept', 'unkind', 'unkinged', 'unkissed', 'unknelled', 'unlaid', 'unlearned', 'unlearnt', 'unleased', 'unled', 'unlet', 'unlike', 'unlimed', 'unlined', 'unlit', 'unlooked', 'unlopped', 'unlost', 'unloved', 'unmade', 'unmailed', 'unmaimed', 'unmanned', 'unmarked', 'unmarred', 'unmasked', 'unmatched', 'unmeant', 'unmeet', 'unmet', 'unmilked', 'unmilled', 'unmissed', 'unmixed', 'unmoaned', 'unmourned', 'unmoved', 'unmown', 'unnamed', 'unoiled', 'unowned', 'unpaced', 'unpaged', 'unpaid', 'unpained', 'unpaired', 'unpared', 'unpaved', 'unpeeled', 'unpent', 'unperched', 'unpicked', 'unpierced', 'unplaced', 'unplagued', 'unplanked', 'unplayed', 'unpleased', 'unpledged', 'unploughed', 'unplucked', 'unplumb', 'unplumbed', 'unplumed', 'unpoised', 'unpolled', 'unposed', 'unpraised', 'unpreached', 'unpressed', 'unpriced', 'unprimed', 'unprized', 'unpropped', 'unproved', 'unpruned', 'unpurged', 'unquelled', 'unquenched', 'unraised', 'unraked', 'unreached', 'unread', 'unreaped', 'unreined', 'unrent', 'unrhymed', 'unribbed', 'unrigged', 'unrimed', 'unripe', 'unroped', 'unrouged', 'unroused', 'unrubbed', 'unrude', 'unruled', 'unsafe', 'unsaid', 'unsailed', 'unsapped', 'unsashed', 'unsaved', 'unscaled', 'unscanned', 'unscarred', 'unscathed', 'unschooled', 'unscorched', 'unscoured', 'unscratched', 'unscreened', 'unsealed', 'unsearched', 'unseen', 'unseized', 'unsensed', 'unsent', 'unset', 'unshamed', 'unshaped', 'unshared', 'unshaved', 'unsheathed', 'unshed', 'unshipped', 'unshocked', 'unshod', 'unshoed', 'unshorn', 'unshown', 'unshrived', 'unshunned', 'unshut', 'unsight', 'unsigned', 'unsized', 'unskilled', 'unskimmed', 'unskinned', 'unslain', 'unsliced', 'unsluiced', 'unslung', 'unsmirched', 'unsmooth', 'unsmoothed', 'unsnuffed', 'unsoaped', 'unsoft', 'unsoiled', 'unsold', 'unsolved', 'unsought', 'unsound', 'unsown', 'unspared', 'unsparred', 'unspelled', 'unspent', 'unspied', 'unspilled', 'unspilt', 'unspoiled', 'unspoilt', 'unsprung', 'unspun', 'unsquared', 'unstack', 'unstacked', 'unstaid', 'unstained', 'unstamped', 'unstarched', 'unstilled', 'unstirred', 'unstitched', 'unstocked', 'unstopped', 'unstrained', 'unstreamed', 'unstressed', 'unstringed', 'unstriped', 'unstripped', 'unstrung', 'unstuck', 'unstuffed', 'unsucked', 'unsung', 'unsure', 'unswayed', 'unswept', 'unsworn', 'untailed', 'untame', 'untamed', 'untanned', 'untapped', 'untarred', 'untaught', 'unteamed', 'unthanked', 'unthawed', 'unthought', 'untied', 'untiled', 'untilled', 'untinged', 'untinned', 'untired', 'untold', 'untombed', 'untoned', 'untorn', 'untouched', 'untraced', 'untracked', 'untrained', 'untrenched', 'untressed', 'untried', 'untrimmed', 'untrod', 'untrue', 'unturfed', 'unturned', 'unurged', 'unused', 'unversed', 'unvexed', 'unviewed', 'unvoiced', 'unwaked', 'unwarmed', 'unwarned', 'unwarped', 'unwashed', 'unwatched', 'unweaned', 'unwebbed', 'unwed', 'unweened', 'unweighed', 'unwell', 'unwept', 'unwet', 'unwhipped', 'unwilled', 'unwinged', 'unwiped', 'unwired', 'unwise', 'unwished', 'unwitched', 'unwon', 'unwooed', 'unworked', 'unworn', 'unwound', 'unwrapped', 'unwrought', 'unwrung', 'upbeat', 'upbound', 'upcast', 'upgrade', 'uphill', 'upmost', 'uppish', 'upraised', 'upset', 'upstage', 'upstaged', 'upstair', 'upstairs', 'upstart', 'upstate', 'upstream', 'uptight', 'uptown', 'upturned', 'upward', 'upwind', 'urbane', 'urdy', 'urgent', 'urnfield', 'useful', 'useless', 'utile', 'utmost', 'vadose', 'vagal', 'vagrant', 'vagrom', 'vaguer', 'vaguest', 'valanced', 'valgus', 'valiant', 'valid', 'valval', 'valvar', 'valvate', 'vambraced', 'vaneless', 'vanward', 'vapid', 'varied', 'varus', 'vassal', 'vasty', 'vatic', 'vaulted', 'vaulting', 'vaunted', 'vaunting', 'vaunty', 'veilless', 'veiny', 'velar', 'velate', 'vellum', 'venal', 'vengeful', 'venose', 'venous', 'ventose', 'verbless', 'verbose', 'verdant', 'verism', 'verist', 'vespine', 'vestral', 'vibrant', 'viceless', 'viewless', 'viewy', 'villose', 'villous', 'vinous', 'viral', 'virgate', 'virile', 'visaged', 'viscid', 'viscose', 'viscous', 'vitric', 'vivid', 'vivo', 'vixen', 'voetstoots', 'vogie', 'voiceful', 'voiceless', 'voided', 'volant', 'volar', 'volumed', 'volvate', 'vorant', 'voteless', 'votive', 'vulpine', 'vying', 'wacky', 'wageless', 'waggish', 'waggly', 'wailful', 'wailing', 'waisted', 'wakeful', 'wakeless', 'wakerife', 'waking', 'walnut', 'wambly', 'wandle', 'waney', 'waning', 'wanner', 'wannest', 'wanning', 'wannish', 'wanting', 'wanton', 'warded', 'warlike', 'warming', 'warmish', 'warning', 'warring', 'wartless', 'wartlike', 'warty', 'wary', 'washy', 'waspish', 'waspy', 'wasted', 'wasteful', 'watchful', 'waveless', 'wavelike', 'waving', 'wavy', 'waxen', 'waxing', 'waxy', 'wayless', 'wayward', 'wayworn', 'weakly', 'weaponed', 'wearied', 'wearing', 'wearish', 'weary', 'weathered', 'webby', 'wedded', 'wedgy', 'weedy', 'weekday', 'weekly', 'weeny', 'weepy', 'weer', 'weest', 'weighted', 'weighty', 'welcome', 'weldless', 'westbound', 'western', 'wetter', 'wettish', 'whacking', 'whacky', 'whapping', "whate'er", 'wheaten', 'wheezing', 'wheezy', 'wheyey', 'whilom', 'whining', 'whinny', 'whiny', 'whiplike', 'whirring', 'whiskered', 'whitish', 'whittling', 'whity', 'wholesale', 'wholesome', 'whopping', 'whoreson', 'whorish', 'wicked', 'wicker', 'wider', 'widespread', 'widest', 'widish', 'wieldy', 'wifeless', 'wifely', 'wiggly', 'wigless', 'wiglike', 'wilful', 'willful', 'willing', 'willyard', 'wily', 'wimpy', 'windburned', 'winded', 'windproof', 'windswept', 'windy', 'wingless', 'winglike', 'wintry', 'winy', 'wiretap', 'wiring', 'wiry', 'wiser', 'wisest', 'wising', 'wispy', 'wistful', 'witchy', 'withdrawn', 'withy', 'witless', 'witted', 'witting', 'witty', 'wizard', 'wizen', 'wizened', 'woaded', 'wobbling', 'woeful', 'woesome', 'wolfish', 'wonky', 'wonted', 'wooded', 'woodless', 'woodsy', 'woodwind', 'woolen', 'woollen', 'woozier', 'woozy', 'wordless', 'wordy', 'workless', 'worldly', 'worldwide', 'wormy', 'worried', 'worser', 'worshipped', 'worthless', 'worthwhile', 'worthy', 'wounded', 'woundless', 'woven', 'wrapround', 'wrathful', 'wrathless', 'wreathless', 'wreckful', 'wretched', 'wrier', 'wriest', 'wriggly', 'wrinkly', 'writhen', 'writhing', 'written', 'wrongful', 'xanthous', 'xerarch', 'xeric', 'xiphoid', 'xylic', 'xyloid', 'yarer', 'yarest', 'yawning', 'yclept', 'yearling', 'yearlong', 'yearly', 'yearning', 'yeastlike', 'yeasty', 'yester', 'yestern', 'yielding', 'yogic', 'yolky', 'yonder', 'younger', 'youthful', 'yttric', 'yuletide', 'zany', 'zealous', 'zebrine', 'zeroth', 'zestful', 'zesty', 'zigzag', 'zillion', 'zincky', 'zincoid', 'zincous', 'zincy', 'zingy', 'zinky', 'zippy', 'zonate', 'zoning']


--------------------------------------------------------------------------------
/deep_tabular/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """Model package"""
 2 | #from .boosting import catboost, xgboost
 3 | from .ft_transformer import ft_transformer, ft_tokenizer, ft_backbone
 4 | from .mlp import mlp
 5 | from .resnet import resnet
 6 | 
 7 | __all__ = ["ft_transformer",
 8 |            "ft_tokenizer",
 9 |            "ft_backbone",
10 |            "mlp",
11 |            "resnet"
12 |            ]
13 | 


--------------------------------------------------------------------------------
/deep_tabular/models/ft_transformer.py:
--------------------------------------------------------------------------------
  1 | """ ft_transformer.py
  2 |     FT Transformer model class
  3 |     Adopted from https://github.com/Yura52/rtdl
  4 |     March 2022
  5 | """
  6 | 
  7 | import math
  8 | import typing as ty
  9 | 
 10 | # from icecream import ic
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | import torch.nn.init as nn_init
 15 | from torch import Tensor
 16 | 
 17 | 
 18 | def reglu(x: Tensor) -> Tensor:
 19 |     a, b = x.chunk(2, dim=-1)
 20 |     return a * F.relu(b)
 21 | 
 22 | 
 23 | class Tokenizer(nn.Module):
 24 |     category_offsets: ty.Optional[Tensor]
 25 | 
 26 |     def __init__(self, d_numerical: int, categories: ty.Optional[ty.List[int]], d_token: int, bias: bool) -> None:
 27 |         super().__init__()
 28 |         if categories is None:
 29 |             d_bias = d_numerical
 30 |             self.category_offsets = None
 31 |             self.category_embeddings = None
 32 |         else:
 33 |             d_bias = d_numerical + len(categories)
 34 |             category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0)
 35 |             self.register_buffer("category_offsets", category_offsets)
 36 |             self.category_embeddings = nn.Embedding(sum(categories), d_token)
 37 |             nn_init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5))
 38 | 
 39 |         # take [CLS] token into account
 40 |         self.weight = nn.Parameter(Tensor(d_numerical + 1, d_token))
 41 |         self.bias = nn.Parameter(Tensor(d_bias, d_token)) if bias else None
 42 |         # The initialization is inspired by nn.Linear
 43 |         nn_init.kaiming_uniform_(self.weight, a=math.sqrt(5))
 44 |         if self.bias is not None:
 45 |             nn_init.kaiming_uniform_(self.bias, a=math.sqrt(5))
 46 | 
 47 |     @property
 48 |     def n_tokens(self) -> int:
 49 |         return len(self.weight) + (
 50 |             0 if self.category_offsets is None else len(self.category_offsets)
 51 |         )
 52 | 
 53 |     def forward(self, x_num: Tensor, x_cat: ty.Optional[Tensor]) -> Tensor:
 54 |         x_some = x_num if x_cat is None else x_cat
 55 |         assert x_some is not None
 56 |         x_num = torch.cat(
 57 |             [torch.ones(len(x_some), 1, device=x_some.device)]  # [CLS]
 58 |             + ([] if x_num is None else [x_num]),
 59 |             dim=1,
 60 |         )
 61 |         x = self.weight[None] * x_num[:, :, None]
 62 |         if x_cat is not None:
 63 |             x = torch.cat(
 64 |                 [x, self.category_embeddings(x_cat + self.category_offsets[None])],
 65 |                 dim=1,
 66 |             )
 67 |         if self.bias is not None:
 68 |             bias = torch.cat(
 69 |                 [
 70 |                     torch.zeros(1, self.bias.shape[1], device=x.device),
 71 |                     self.bias,
 72 |                 ]
 73 |             )
 74 |             x = x + bias[None]
 75 |         return x
 76 | 
 77 | 
 78 | class MultiheadAttention(nn.Module):
 79 |     def __init__(
 80 |             self, d: int, n_heads: int, dropout: float, initialization: str
 81 |     ) -> None:
 82 |         if n_heads > 1:
 83 |             assert d % n_heads == 0
 84 |         assert initialization in ["xavier", "kaiming"]
 85 | 
 86 |         super().__init__()
 87 |         self.W_q = nn.Linear(d, d)
 88 |         self.W_k = nn.Linear(d, d)
 89 |         self.W_v = nn.Linear(d, d)
 90 |         self.W_out = nn.Linear(d, d) if n_heads > 1 else None
 91 |         self.n_heads = n_heads
 92 |         self.dropout = nn.Dropout(dropout) if dropout else None
 93 | 
 94 |         for m in [self.W_q, self.W_k, self.W_v]:
 95 |             if initialization == "xavier" and (n_heads > 1 or m is not self.W_v):
 96 |                 # gain is needed since W_qkv is represented with 3 separate layers
 97 |                 nn_init.xavier_uniform_(m.weight, gain=1 / math.sqrt(2))
 98 |             nn_init.zeros_(m.bias)
 99 |         if self.W_out is not None:
100 |             nn_init.zeros_(self.W_out.bias)
101 | 
102 |     def _reshape(self, x: Tensor) -> Tensor:
103 |         batch_size, n_tokens, d = x.shape
104 |         d_head = d // self.n_heads
105 |         return (
106 |             x.reshape(batch_size, n_tokens, self.n_heads, d_head)
107 |                 .transpose(1, 2)
108 |                 .reshape(batch_size * self.n_heads, n_tokens, d_head)
109 |         )
110 | 
111 |     def forward(
112 |             self,
113 |             x_q: Tensor,
114 |             x_kv: Tensor,
115 |             key_compression: ty.Optional[nn.Linear],
116 |             value_compression: ty.Optional[nn.Linear],
117 |     ) -> Tensor:
118 |         q, k, v = self.W_q(x_q), self.W_k(x_kv), self.W_v(x_kv)
119 |         for tensor in [q, k, v]:
120 |             assert tensor.shape[-1] % self.n_heads == 0
121 |         if key_compression is not None:
122 |             assert value_compression is not None
123 |             k = key_compression(k.transpose(1, 2)).transpose(1, 2)
124 |             v = value_compression(v.transpose(1, 2)).transpose(1, 2)
125 |         else:
126 |             assert value_compression is None
127 | 
128 |         batch_size = len(q)
129 |         d_head_key = k.shape[-1] // self.n_heads
130 |         d_head_value = v.shape[-1] // self.n_heads
131 |         n_q_tokens = q.shape[1]
132 | 
133 |         q = self._reshape(q)
134 |         k = self._reshape(k)
135 |         attention = F.softmax(q @ k.transpose(1, 2) / math.sqrt(d_head_key), dim=-1)
136 |         if self.dropout is not None:
137 |             attention = self.dropout(attention)
138 |         x = attention @ self._reshape(v)
139 |         x = (
140 |             x.reshape(batch_size, self.n_heads, n_q_tokens, d_head_value)
141 |                 .transpose(1, 2)
142 |                 .reshape(batch_size, n_q_tokens, self.n_heads * d_head_value)
143 |         )
144 |         if self.W_out is not None:
145 |             x = self.W_out(x)
146 |         return x
147 | 
148 | 
149 | class FTTransformer(nn.Module):
150 |     """Transformer.
151 | 
152 |     References:
153 |     - https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html
154 |     - https://github.com/facebookresearch/pytext/tree/master/pytext/models/representations/transformer
155 |     - https://github.com/pytorch/fairseq/blob/1bba712622b8ae4efb3eb793a8a40da386fe11d0/examples/linformer/linformer_src/modules/multihead_linear_attention.py#L19
156 |     """
157 | 
158 |     def __init__(self,
159 |                  d_numerical,
160 |                  d_out,
161 |                  categories,
162 |                  d_embedding,
163 |                  token_bias,
164 |                  n_layers,
165 |                  n_heads,
166 |                  d_ffn_factor,
167 |                  attention_dropout,
168 |                  ffn_dropout,
169 |                  residual_dropout,
170 |                  activation,
171 |                  prenormalization,
172 |                  initialization,
173 |                  kv_compression,
174 |                  kv_compression_sharing):
175 |         assert (kv_compression is None) ^ (kv_compression_sharing is not None)
176 | 
177 |         super().__init__()
178 |         self.tokenizer = Tokenizer(d_numerical, categories, d_embedding, token_bias)
179 |         n_tokens = self.tokenizer.n_tokens
180 | 
181 |         def make_kv_compression():
182 |             assert kv_compression
183 |             compression = nn.Linear(
184 |                 n_tokens, int(n_tokens * kv_compression), bias=False
185 |             )
186 |             if initialization == "xavier":
187 |                 nn_init.xavier_uniform_(compression.weight)
188 |             return compression
189 | 
190 |         self.shared_kv_compression = (
191 |             make_kv_compression()
192 |             if kv_compression and kv_compression_sharing == "layerwise"
193 |             else None
194 |         )
195 | 
196 |         def make_normalization():
197 |             return nn.LayerNorm(d_embedding)
198 | 
199 |         d_hidden = int(d_embedding * d_ffn_factor)
200 |         self.layers = nn.ModuleList([])
201 |         for layer_idx in range(n_layers):
202 |             layer = nn.ModuleDict(
203 |                 {
204 |                     "attention": MultiheadAttention(
205 |                         d_embedding, n_heads, attention_dropout, initialization
206 |                     ),
207 |                     "linear0": nn.Linear(
208 |                         d_embedding, d_hidden * (2 if activation.endswith("glu") else 1)
209 |                     ),
210 |                     "linear1": nn.Linear(d_hidden, d_embedding),
211 |                     "norm1": make_normalization(),
212 |                 }
213 |             )
214 |             if not prenormalization or layer_idx:
215 |                 layer["norm0"] = make_normalization()
216 |             if kv_compression and self.shared_kv_compression is None:
217 |                 layer["key_compression"] = make_kv_compression()
218 |                 if kv_compression_sharing == "headwise":
219 |                     layer["value_compression"] = make_kv_compression()
220 |                 else:
221 |                     assert kv_compression_sharing == "key-value"
222 |             self.layers.append(layer)
223 | 
224 |         self.activation = reglu
225 |         self.last_activation = F.relu
226 |         self.prenormalization = prenormalization
227 |         self.last_normalization = make_normalization() if prenormalization else None
228 |         self.ffn_dropout = ffn_dropout
229 |         self.residual_dropout = residual_dropout
230 |         self.head = nn.Linear(d_embedding, d_out)
231 | 
232 |     def _get_kv_compressions(self, layer):
233 |         return (
234 |             (self.shared_kv_compression, self.shared_kv_compression)
235 |             if self.shared_kv_compression is not None
236 |             else (layer["key_compression"], layer["value_compression"])
237 |             if "key_compression" in layer and "value_compression" in layer
238 |             else (layer["key_compression"], layer["key_compression"])
239 |             if "key_compression" in layer
240 |             else (None, None)
241 |         )
242 | 
243 |     def _start_residual(self, x, layer, norm_idx):
244 |         x_residual = x
245 |         if self.prenormalization:
246 |             norm_key = f"norm{norm_idx}"
247 |             if norm_key in layer:
248 |                 x_residual = layer[norm_key](x_residual)
249 |         return x_residual
250 | 
251 |     def _end_residual(self, x, x_residual, layer, norm_idx):
252 |         if self.residual_dropout:
253 |             x_residual = F.dropout(x_residual, self.residual_dropout, self.training)
254 |         x = x + x_residual
255 |         if not self.prenormalization:
256 |             x = layer[f"norm{norm_idx}"](x)
257 |         return x
258 | 
259 |     def forward(self, x_num, x_cat):
260 |         x = self.tokenizer(x_num, x_cat)
261 | 
262 |         for layer_idx, layer in enumerate(self.layers):
263 |             is_last_layer = layer_idx + 1 == len(self.layers)
264 |             layer = ty.cast(ty.Dict[str, nn.Module], layer)
265 | 
266 |             x_residual = self._start_residual(x, layer, 0)
267 |             x_residual = layer["attention"](
268 |                 # for the last attention, it is enough to process only [CLS]
269 |                 (x_residual[:, :1] if is_last_layer else x_residual),
270 |                 x_residual,
271 |                 *self._get_kv_compressions(layer),
272 |             )
273 |             if is_last_layer:
274 |                 x = x[:, : x_residual.shape[1]]
275 |             x = self._end_residual(x, x_residual, layer, 0)
276 | 
277 |             x_residual = self._start_residual(x, layer, 1)
278 |             x_residual = layer["linear0"](x_residual)
279 |             x_residual = self.activation(x_residual)
280 |             if self.ffn_dropout:
281 |                 x_residual = F.dropout(x_residual, self.ffn_dropout, self.training)
282 |             x_residual = layer["linear1"](x_residual)
283 |             x = self._end_residual(x, x_residual, layer, 1)
284 | 
285 |         assert x.shape[1] == 1
286 |         x = x[:, 0]
287 |         if self.last_normalization is not None:
288 |             x = self.last_normalization(x)
289 |         x = self.last_activation(x)
290 |         x = self.head(x)
291 |         x = x.squeeze(-1)
292 |         return x
293 | 
294 | 
295 | class FTBackbone(nn.Module):
296 | 
297 |     def __init__(self, d_embedding, n_layers, n_heads, d_ffn_factor, attention_dropout, ffn_dropout,
298 |                  residual_dropout, activation, prenormalization, initialization):
299 |         super().__init__()
300 | 
301 |         d_hidden = int(d_embedding * d_ffn_factor)
302 |         self.layers = nn.ModuleList([])
303 |         for layer_idx in range(n_layers):
304 |             layer = nn.ModuleDict(
305 |                 {
306 |                     "attention": MultiheadAttention(
307 |                         d_embedding, n_heads, attention_dropout, initialization
308 |                     ),
309 |                     "linear0": nn.Linear(
310 |                         d_embedding, d_hidden * (2 if activation.endswith("glu") else 1)
311 |                     ),
312 |                     "linear1": nn.Linear(d_hidden, d_embedding),
313 |                     "norm1": nn.LayerNorm(d_embedding),
314 |                 }
315 |             )
316 |             if not prenormalization or layer_idx:
317 |                 layer["norm0"] = nn.LayerNorm(d_embedding)
318 |             self.layers.append(layer)
319 | 
320 |         self.activation = reglu
321 |         self.last_activation = F.relu
322 |         self.prenormalization = prenormalization
323 |         self.last_normalization = nn.LayerNorm(d_embedding) if prenormalization else None
324 |         self.ffn_dropout = ffn_dropout
325 |         self.residual_dropout = residual_dropout
326 | 
327 |     def _start_residual(self, x, layer, norm_idx):
328 |         x_residual = x
329 |         if self.prenormalization:
330 |             norm_key = f"norm{norm_idx}"
331 |             if norm_key in layer:
332 |                 x_residual = layer[norm_key](x_residual)
333 |         return x_residual
334 | 
335 |     def _end_residual(self, x, x_residual, layer, norm_idx):
336 |         if self.residual_dropout:
337 |             x_residual = F.dropout(x_residual, self.residual_dropout, self.training)
338 |         x = x + x_residual
339 |         if not self.prenormalization:
340 |             x = layer[f"norm{norm_idx}"](x)
341 |         return x
342 | 
343 |     def forward(self, x):
344 | 
345 |         for layer_idx, layer in enumerate(self.layers):
346 |             is_last_layer = layer_idx + 1 == len(self.layers)
347 |             layer = ty.cast(ty.Dict[str, nn.Module], layer)
348 | 
349 |             x_residual = self._start_residual(x, layer, 0)
350 |             x_residual = layer["attention"](
351 |                 # for the last attention, it is enough to process only [CLS]
352 |                 (x_residual[:, :1] if is_last_layer else x_residual),
353 |                 x_residual,
354 |                 None,
355 |                 None,
356 |             )
357 |             if is_last_layer:
358 |                 x = x[:, : x_residual.shape[1]]
359 |             x = self._end_residual(x, x_residual, layer, 0)
360 | 
361 |             x_residual = self._start_residual(x, layer, 1)
362 |             x_residual = layer["linear0"](x_residual)
363 |             x_residual = self.activation(x_residual)
364 |             if self.ffn_dropout:
365 |                 x_residual = F.dropout(x_residual, self.ffn_dropout, self.training)
366 |             x_residual = layer["linear1"](x_residual)
367 |             x = self._end_residual(x, x_residual, layer, 1)
368 | 
369 |         assert x.shape[1] == 1
370 |         x = x[:, 0]
371 |         if self.last_normalization is not None:
372 |             x = self.last_normalization(x)
373 |         x = self.last_activation(x)
374 |         return x
375 | 
376 | 
377 | def ft_transformer(num_numerical, unique_categories, num_outputs, d_embedding, model_params):
378 |     return FTTransformer(num_numerical, num_outputs, unique_categories, d_embedding,
379 |                          model_params.token_bias,
380 |                          model_params.n_layers,
381 |                          model_params.n_heads,
382 |                          model_params.d_ffn_factor,
383 |                          model_params.attention_dropout,
384 |                          model_params.ffn_dropout,
385 |                          model_params.residual_dropout,
386 |                          model_params.activation,
387 |                          model_params.prenormalization,
388 |                          model_params.initialization,
389 |                          model_params.kv_compression,
390 |                          model_params.kv_compression_sharing)
391 | 
392 | 
393 | def ft_tokenizer(num_numerical, unique_categories, d_embedding, token_bias):
394 |     return Tokenizer(num_numerical, unique_categories, d_embedding, token_bias)
395 | 
396 | 
397 | def ft_backbone(model_params):
398 |     return FTBackbone(model_params.d_embedding,
399 |                       model_params.n_layers,
400 |                       model_params.n_heads,
401 |                       model_params.d_ffn_factor,
402 |                       model_params.attention_dropout,
403 |                       model_params.ffn_dropout,
404 |                       model_params.residual_dropout,
405 |                       model_params.activation,
406 |                       model_params.prenormalization,
407 |                       model_params.initialization)
408 | 


--------------------------------------------------------------------------------
/deep_tabular/models/mlp.py:
--------------------------------------------------------------------------------
 1 | """ mlp.py
 2 |     MLP model calss
 3 |     Adopted from https://github.com/Yura52/rtdl
 4 |     March 2022
 5 | """
 6 | 
 7 | import math
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | 
14 | class MLP(nn.Module):
15 |     def __init__(self, d_in, d_out, categories, d_embedding, d_layers, dropout):
16 |         super().__init__()
17 | 
18 |         # if we have categorical data
19 |         if categories is not None:
20 |             # update d_in to account for the number of
21 |             # TODO Why isn't d_in correct to begin with? Does this mean it is just dimension of numerical data?
22 |             d_in += len(categories) * d_embedding
23 | 
24 |             # compute offsets so that categorical features do not overlap
25 |             category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0)
26 |             self.register_buffer("category_offsets", category_offsets)
27 |             self.category_embeddings = nn.Embedding(sum(categories), d_embedding)
28 |             nn.init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5))
29 | 
30 |         self.layers = nn.ModuleList([nn.Linear(d_layers[i - 1] if i else d_in, x) for i, x in enumerate(d_layers)])
31 |         self.dropout = dropout
32 |         self.head = nn.Linear(d_layers[-1] if d_layers else d_in, d_out)
33 | 
34 |     def forward(self, x_num, x_cat):
35 |         x = []
36 |         if x_num is not None:
37 |             x.append(x_num)
38 |         if x_cat is not None:
39 |             x.append(self.category_embeddings(x_cat + self.category_offsets[None]).view(x_cat.size(0), -1))
40 |         x = torch.cat(x, dim=-1)
41 | 
42 |         for layer in self.layers:
43 |             x = layer(x)
44 |             x = F.relu(x)
45 |             if self.dropout:
46 |                 x = F.dropout(x, self.dropout, self.training)
47 |         x = self.head(x)
48 |         x = x.squeeze(-1)
49 |         return x
50 | 
51 | 
52 | def mlp(num_numerical, unique_categories, num_outputs, d_embedding, model_params):
53 |     return MLP(num_numerical, num_outputs, unique_categories, d_embedding, model_params.d_layers, model_params.dropout)
54 | 


--------------------------------------------------------------------------------
/deep_tabular/models/resnet.py:
--------------------------------------------------------------------------------
  1 | """ resnet.py
  2 |     ResNet model calss
  3 |     Adopted from https://github.com/Yura52/rtdl
  4 |     March 2022
  5 | """
  6 | 
  7 | import math
  8 | import typing as ty
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class ResNet(nn.Module):
 15 |     def __init__(self,
 16 |                  d_numerical,
 17 |                  d_out,
 18 |                  categories,
 19 |                  d_embedding,
 20 |                  d,
 21 |                  d_hidden_factor,
 22 |                  n_layers,
 23 |                  activation,
 24 |                  normalization,
 25 |                  hidden_dropout,
 26 |                  residual_dropout):
 27 |         super().__init__()
 28 | 
 29 |         def make_normalization():
 30 |             return {'batchnorm': nn.BatchNorm1d, 'layernorm': nn.LayerNorm}[
 31 |                 normalization
 32 |             ](d)
 33 | 
 34 |         self.main_activation = F.relu
 35 |         self.last_activation = F.relu
 36 |         self.residual_dropout = residual_dropout
 37 |         self.hidden_dropout = hidden_dropout
 38 | 
 39 |         d_in = d_numerical
 40 |         d_hidden = int(d * d_hidden_factor)
 41 | 
 42 |         if categories is not None:
 43 |             d_in += len(categories) * d_embedding
 44 |             category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0)
 45 |             self.register_buffer('category_offsets', category_offsets)
 46 |             self.category_embeddings = nn.Embedding(sum(categories), d_embedding)
 47 |             nn.init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5))
 48 | 
 49 |         self.first_layer = nn.Linear(d_in, d)
 50 |         self.layers = nn.ModuleList(
 51 |             [
 52 |                 nn.ModuleDict(
 53 |                     {
 54 |                         'norm': make_normalization(),
 55 |                         'linear0': nn.Linear(
 56 |                             d, d_hidden * (2 if activation.endswith('glu') else 1)
 57 |                         ),
 58 |                         'linear1': nn.Linear(d_hidden, d),
 59 |                     }
 60 |                 )
 61 |                 for _ in range(n_layers)
 62 |             ]
 63 |         )
 64 |         self.last_normalization = make_normalization()
 65 |         self.head = nn.Linear(d, d_out)
 66 | 
 67 |     def forward(self, x_num, x_cat):
 68 |         x = []
 69 |         if x_num is not None:
 70 |             x.append(x_num)
 71 |         if x_cat is not None:
 72 |             x.append(
 73 |                 self.category_embeddings(x_cat + self.category_offsets[None]).view(
 74 |                     x_cat.size(0), -1
 75 |                 )
 76 |             )
 77 |         x = torch.cat(x, dim=-1)
 78 | 
 79 |         x = self.first_layer(x)
 80 |         for layer in self.layers:
 81 |             layer = ty.cast(ty.Dict[str, nn.Module], layer)
 82 |             z = x
 83 |             z = layer['norm'](z)
 84 |             z = layer['linear0'](z)
 85 |             z = self.main_activation(z)
 86 |             if self.hidden_dropout:
 87 |                 z = F.dropout(z, self.hidden_dropout, self.training)
 88 |             z = layer['linear1'](z)
 89 |             if self.residual_dropout:
 90 |                 z = F.dropout(z, self.residual_dropout, self.training)
 91 |             x = x + z
 92 |         x = self.last_normalization(x)
 93 |         x = self.last_activation(x)
 94 |         x = self.head(x)
 95 |         x = x.squeeze(-1)
 96 |         return x
 97 | 
 98 | 
 99 | def resnet(num_numerical, unique_categories, num_outputs, d_embedding, model_params):
100 |     return ResNet(num_numerical, num_outputs, unique_categories, d_embedding,
101 |                   model_params.d,
102 |                   model_params.d_hidden_factor,
103 |                   model_params.n_layers,
104 |                   model_params.activation,
105 |                   model_params.normalization,
106 |                   model_params.hidden_dropout,
107 |                   model_params.residual_dropout)
108 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .data_tools import get_data_openml, get_categories_full_cat_data, TabularDataset
 2 | from .tools import generate_run_id
 3 | from .tools import get_backbone
 4 | from .tools import get_criterion
 5 | from .tools import get_dataloaders
 6 | from .tools import get_embedder
 7 | from .tools import get_head
 8 | from .tools import get_optimizer_for_backbone, get_optimizer_for_single_net
 9 | from .tools import load_transfer_model_from_checkpoint, load_model_from_checkpoint
10 | from .tools import write_to_tb
11 | 
12 | __all__ = ["generate_run_id",
13 |            "get_backbone",
14 |            "get_categories_full_cat_data",
15 |            "get_data_openml",
16 |            "get_dataloaders",
17 |            "get_embedder",
18 |            "get_head",
19 |            "get_optimizer_for_backbone",
20 |            "get_optimizer_for_single_net",
21 |            "load_transfer_model_from_checkpoint",
22 |            "load_model_from_checkpoint",
23 |            "TabularDataset",
24 |            "write_to_tb"]
25 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/data_tools.py:
--------------------------------------------------------------------------------
  1 | """ data_tools.py
  2 |     Tools for building tabular datasets
  3 |     Developed for Tabular Transfer Learning project
  4 |     April 2022
  5 |     Some functionality adopted from https://github.com/Yura52/rtdl
  6 | """
  7 | 
  8 | import logging
  9 | import os
 10 | import warnings
 11 | from copy import deepcopy
 12 | from dataclasses import dataclass
 13 | from typing import Optional, Dict, Any
 14 | 
 15 | # from icecream import ic
 16 | import numpy as np
 17 | import openml
 18 | import pandas as pd
 19 | import sklearn.preprocessing
 20 | import torch
 21 | from sklearn.preprocessing import LabelEncoder
 22 | from sklearn.model_selection import train_test_split
 23 | import pickle
 24 | 
 25 | # Ignore statements for pylint:
 26 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
 27 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
 28 | #     Too many local variables (R0914), Missing docstring (C0116, C0115).
 29 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115
 30 | 
 31 | 
 32 | def get_categories_full_cat_data(full_cat_data_for_encoder):
 33 |     return (
 34 |         None
 35 |         if full_cat_data_for_encoder is None
 36 |         else [
 37 |             len(set(full_cat_data_for_encoder.values[:, i]))
 38 |             for i in range(full_cat_data_for_encoder.shape[1])
 39 |         ]
 40 |     )
 41 | 
 42 | 
 43 | def get_data_openml(dataset_id):
 44 |     dataset = openml.datasets.get_dataset(dataset_id)
 45 |     data, targets, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe",
 46 |                                                                              target=dataset.default_target_attribute)
 47 |     categorical_columns = list(data.columns[np.array(categorical_indicator)])
 48 |     numerical_columns = list(data.columns[~np.array(categorical_indicator)])
 49 |     return data, targets, categorical_columns, numerical_columns
 50 | 
 51 | 
 52 | def get_data_locally(ds_id):
 53 |     if os.path.exists(f'../../../data/{ds_id}/N.csv'):
 54 |         X_full_num = pd.read_csv(f'../../../data/{ds_id}/N.csv')
 55 |         numerical_columns = list(X_full_num.columns)
 56 |     else:
 57 |         X_full_num = pd.DataFrame()
 58 |         numerical_columns = []
 59 |     if os.path.exists(f'../../../data/{ds_id}/C.csv'):
 60 |         X_full_cat = pd.read_csv(f'../../../data/{ds_id}/C.csv')
 61 |         categorical_columns = list(X_full_cat.columns)
 62 |     else:
 63 |         X_full_cat = pd.DataFrame()
 64 |         categorical_columns = []
 65 | 
 66 |     X_full = pd.concat([X_full_num, X_full_cat], axis = 1)
 67 |     y_full = pd.read_csv(f'../../../data/{ds_id}/y.csv')
 68 | 
 69 |     if y_full.shape[1] == 1:
 70 |         y_full = y_full.iloc[:, 0]
 71 |     else:
 72 |         raise ValueError('Targets have more than one column and the task is not multilabel')
 73 | 
 74 | 
 75 |     return X_full, y_full, categorical_columns, numerical_columns
 76 | 
 77 | def get_data(dataset_id, source, task, datasplit=[.65, .15, .2]):
 78 |     """
 79 |     Function to read and prepare a multiclass/binclass/regression dataset
 80 |     """
 81 |     seed = 0
 82 |     np.random.seed(seed)
 83 | 
 84 |     if source == 'openml':
 85 |         data, targets, categorical_columns, numerical_columns = get_data_openml(dataset_id)
 86 |     elif source == 'local':
 87 |         data, targets, categorical_columns, numerical_columns = get_data_locally(dataset_id)
 88 |         np.random.seed(seed)
 89 |     # Fixes some bugs in openml datasets
 90 |     if targets.dtype.name == "category":
 91 |         targets = targets.apply(str).astype('object')
 92 | 
 93 |     for col in categorical_columns:
 94 |         data[col] = data[col].apply(str).astype("object")
 95 | 
 96 |     # reindex and find NaNs/Missing values in categorical columns
 97 |     data, targets = data.reset_index(drop=True), targets.reset_index(drop=True)
 98 |     data[categorical_columns] = data[categorical_columns].fillna("___null___")
 99 | 
100 |     if task != 'regression':
101 |         l_enc = LabelEncoder()
102 |         targets = l_enc.fit_transform(targets)
103 |     else:
104 |         targets = targets.to_numpy()
105 | 
106 |     # split data into train/val/test
107 |     train_size, test_size, valid_size = datasplit[0], datasplit[2], datasplit[1]/(1-datasplit[2])
108 |     if task != 'regression':
109 |         data_train, data_test, targets_train, targets_test = train_test_split(data, targets, test_size=test_size, random_state=seed, stratify = targets)
110 |         data_train, data_val, targets_train, targets_val = train_test_split(data_train, targets_train, test_size=valid_size, random_state=seed, stratify = targets_train)
111 |     else:
112 |         data_train, data_test, targets_train, targets_test = train_test_split(data, targets, test_size=test_size, random_state=seed)
113 |         data_train, data_val, targets_train, targets_val = train_test_split(data_train, targets_train, test_size=valid_size, random_state=seed)
114 | 
115 | 
116 | 
117 |     data_cat_train = data_train[categorical_columns].values
118 |     data_num_train = data_train[numerical_columns].values
119 | 
120 |     data_cat_val = data_val[categorical_columns].values
121 |     data_num_val = data_val[numerical_columns].values
122 | 
123 |     data_cat_test = data_test[categorical_columns].values
124 |     data_num_test = data_test[numerical_columns].values
125 | 
126 |     info = {"name": dataset_id,
127 |             "task_type": task,
128 |             "n_num_features": len(numerical_columns),
129 |             "n_cat_features": len(categorical_columns),
130 |             "train_size": data_train.shape[0],
131 |             "val_size": data_val.shape[0],
132 |             "test_size": data_test.shape[0]}
133 | 
134 |     if task == "multiclass":
135 |         info["n_classes"] = len(set(targets))
136 |     if task == "binclass":
137 |         info["n_classes"] = 1
138 |     if task == "regression":
139 |         info["n_classes"] = 1
140 | 
141 |     if len(numerical_columns) > 0:
142 |         numerical_data = {"train": data_num_train, "val": data_num_val, "test": data_num_test}
143 |     else:
144 |         numerical_data = None
145 | 
146 |     if len(categorical_columns) > 0:
147 |         categorical_data = {"train": data_cat_train, "val": data_cat_val, "test": data_cat_test}
148 |     else:
149 |         categorical_data = None
150 | 
151 |     targets = {"train": targets_train, "val": targets_val, "test": targets_test}
152 | 
153 |     if len(categorical_columns) > 0:
154 |         full_cat_data_for_encoder = data[categorical_columns]
155 |     else:
156 |         full_cat_data_for_encoder = None
157 | 
158 |     return numerical_data, categorical_data, targets, info, full_cat_data_for_encoder
159 | 
160 | 
161 | def get_multilabel_data(ds_id, source, task):
162 |     """
163 |     Function to read and prepare a multi-label dataset -- handling of multiple targets is slightly different from the other cases
164 |     """
165 |     if source != 'local':
166 |         raise ValueError("Only locally stored multilabel datasets are accepted. If it is local, double check 'source: local' in dataset config")
167 |     seed = 0
168 |     np.random.seed(seed)
169 |     if os.path.exists(f'../../../data/{ds_id}/N.csv'):
170 |         X_full_num = pd.read_csv(f'../../../data/{ds_id}/N.csv')
171 |         numerical_columns = list(X_full_num.columns)
172 |     else:
173 |         X_full_num = pd.DataFrame()
174 |         numerical_columns = []
175 |     if os.path.exists(f'../../../data/{ds_id}/C.csv'):
176 |         X_full_cat = pd.read_csv(f'../../../data/{ds_id}/C.csv')
177 |         categorical_columns = list(X_full_cat.columns)
178 |     else:
179 |         X_full_cat = pd.DataFrame()
180 |         categorical_columns = []
181 | 
182 |     X_full = pd.concat([X_full_num, X_full_cat], axis = 1)
183 |     y_full = pd.read_csv(f'../../../data/{ds_id}/y.csv')
184 | 
185 |     X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=1)
186 |     X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1875, random_state=1)
187 | 
188 |     X_train[categorical_columns] = X_train[categorical_columns].fillna("MissingValue")
189 |     X_val[categorical_columns] = X_val[categorical_columns].fillna("MissingValue")
190 |     X_test[categorical_columns] = X_test[categorical_columns].fillna("MissingValue")
191 |     # print(numerical_columns)
192 |     # print(categorical_columns)
193 | 
194 |     X_cat_train = X_train[categorical_columns].values
195 |     X_num_train = X_train[numerical_columns].values.astype('float')
196 |     y_train = y_train.values.astype('float')
197 | 
198 |     X_cat_val = X_val[categorical_columns].values
199 |     X_num_val = X_val[numerical_columns].values.astype('float')
200 |     y_val = y_val.values.astype('float')
201 | 
202 |     X_cat_test = X_test[categorical_columns].values
203 |     X_num_test = X_test[numerical_columns].values.astype('float')
204 |     y_test = y_test.values.astype('float')
205 | 
206 |     info = {}
207 |     info['name'] = ds_id
208 |     info['task_type'] = task
209 |     info['n_num_features'] = len(numerical_columns)
210 |     info['n_cat_features'] = len(categorical_columns)
211 |     info['train_size'] = X_train.shape[0]
212 |     info['val_size'] = X_val.shape[0]
213 |     info['test_size'] = X_test.shape[0]
214 | 
215 | 
216 |     if len(y_train.shape) > 1:
217 |         info['n_classes'] = y_train.shape[1]
218 |     else:
219 |         info['n_classes'] = 1
220 | 
221 |     if len(numerical_columns) > 0:
222 |         numerical_data = {'train': X_num_train, 'val': X_num_val, 'test': X_num_test}
223 |     else:
224 |         numerical_data = None
225 | 
226 |     if len(categorical_columns) > 0:
227 |         categorical_data = {'train': X_cat_train, 'val': X_cat_val, 'test': X_cat_test}
228 |     else:
229 |         categorical_data = None
230 | 
231 |     targets = {'train': y_train, 'val': y_val, 'test': y_test}
232 |     print('\n Train size:{} Val size:{} Test size:{}'.format(len(y_train), len(y_val), len(y_test)))
233 | 
234 |     if len(categorical_columns) > 0:
235 |         full_cat_data_for_encoder = X_full[categorical_columns]
236 |     else:
237 |         full_cat_data_for_encoder = None
238 |     return numerical_data, categorical_data, targets, info, full_cat_data_for_encoder
239 | 
240 | @dataclass
241 | class TabularDataset:
242 |     x_num: Optional[Dict[str, np.ndarray]]
243 |     x_cat: Optional[Dict[str, np.ndarray]]
244 |     y: Dict[str, np.ndarray]
245 |     info: Dict[str, Any]
246 |     normalization: Optional[str]
247 |     cat_policy: str
248 |     seed: int
249 |     full_cat_data_for_encoder: Optional[pd.DataFrame]
250 |     y_policy: Optional[str] = None
251 |     normalizer_path: Optional[str] = None
252 |     stage: Optional[str] = None
253 | 
254 |     @property
255 |     def is_binclass(self):
256 |         return self.info['task_type'] == "binclass"
257 | 
258 |     @property
259 |     def is_multiclass(self):
260 |         return self.info['task_type'] == "multiclass"
261 | 
262 |     @property
263 |     def is_regression(self):
264 |         return self.info['task_type'] == "regression"
265 | 
266 |     @property
267 |     def n_num_features(self):
268 |         return self.info["n_num_features"]
269 | 
270 |     @property
271 |     def n_cat_features(self):
272 |         return self.info["n_cat_features"]
273 | 
274 |     @property
275 |     def n_features(self):
276 |         return self.n_num_features + self.n_cat_features
277 | 
278 |     @property
279 |     def n_classes(self):
280 |         return self.info["n_classes"]
281 | 
282 |     @property
283 |     def parts(self):
284 |         return self.x_num.keys() if self.x_num is not None else self.x_cat.keys()
285 | 
286 |     def size(self, part: str):
287 |         x = self.x_num if self.x_num is not None else self.x_cat
288 |         assert x is not None
289 |         return len(x[part])
290 | 
291 |     def normalize(self, x_num, noise=1e-3):
292 |         x_num_train = x_num['train'].copy()
293 |         if self.normalization == 'standard':
294 |             normalizer = sklearn.preprocessing.StandardScaler()
295 |         elif self.normalization == 'quantile':
296 |             normalizer = sklearn.preprocessing.QuantileTransformer(
297 |                 output_distribution='normal',
298 |                 n_quantiles=max(min(x_num['train'].shape[0] // 30, 1000), 10),
299 |                 subsample=1e9,
300 |                 random_state=self.seed,
301 |             )
302 |             if noise:
303 |                 stds = np.std(x_num_train, axis=0, keepdims=True)
304 |                 noise_std = noise / np.maximum(stds, noise)
305 |                 x_num_train += noise_std * np.random.default_rng(self.seed).standard_normal(x_num_train.shape)
306 |         else:
307 |             raise ValueError('Unknown Normalization')
308 |         normalizer.fit(x_num_train)
309 |         if self.normalizer_path is not None:
310 |             if self.stage is None:
311 |                 raise ValueError('stage is None, only pretrain or downstream are accepted if normalizer_path is not None')
312 |             if self.stage == 'pretrain':
313 |                 pickle.dump(normalizer, open(self.normalizer_path, 'wb'))
314 |                 print(f'Normalizer saved to {self.normalizer_path}')
315 |             if self.stage == 'downstream':
316 |                 normalizer = pickle.load(open(self.normalizer_path, 'rb'))
317 |                 print(f'Normalizer loaded from {self.normalizer_path}')
318 |         return {k: normalizer.transform(v) for k, v in x_num.items()}
319 | 
320 |     def handle_missing_values_numerical_features(self, x_num):
321 |         # TODO: handle num_nan_masks for SAINT
322 |         # num_nan_masks_int = {k: (~np.isnan(v)).astype(int) for k, v in x_num.items()}
323 |         num_nan_masks = {k: np.isnan(v) for k, v in x_num.items()}
324 |         if any(x.any() for x in num_nan_masks.values()):
325 | 
326 |             # TODO check if we need self.x_num here
327 |             num_new_values = np.nanmean(self.x_num['train'], axis=0)
328 |             for k, v in x_num.items():
329 |                 num_nan_indices = np.where(num_nan_masks[k])
330 |                 v[num_nan_indices] = np.take(num_new_values, num_nan_indices[1])
331 |         return x_num
332 | 
333 |     def encode_categorical_features(self, x_cat):
334 |         encoder = sklearn.preprocessing.OrdinalEncoder(handle_unknown='error', dtype='int64')
335 |         encoder.fit(self.full_cat_data_for_encoder.values)
336 |         x_cat = {k: encoder.transform(v) for k, v in x_cat.items()}
337 |         return x_cat
338 | 
339 |     def transform_categorical_features_to_ohe(self, x_cat):
340 |         ohe = sklearn.preprocessing.OneHotEncoder(handle_unknown='ignore', sparse=False, dtype='float32')
341 |         ohe.fit(self.full_cat_data_for_encoder.astype('str'))
342 |         x_cat = {k: ohe.transform(v.astype('str')) for k, v in x_cat.items()}
343 |         return x_cat
344 | 
345 |     def concatenate_data(self, x_cat, x_num):
346 |         if self.cat_policy == 'indices':
347 |             result = [x_num, x_cat]
348 | 
349 |         elif self.cat_policy == 'ohe':
350 |             # TODO: handle output for models that need ohe
351 |             raise ValueError('Not implemented')
352 |         return result
353 | 
354 |     def preprocess_data(self):
355 |         # TODO: seed (?)
356 |         logging.info('Building Dataset')
357 |         # TODO: figure out if we really need a copy of data or if we can preprocess it in place
358 |         if self.x_num:
359 |             x_num = deepcopy(self.x_num)
360 |             x_num = self.handle_missing_values_numerical_features(x_num)
361 |             if self.normalization:
362 |                 x_num = self.normalize(x_num)
363 |         else:
364 |             # if x_num is None replace with empty tensor for dataloader
365 |             x_num = {part: torch.empty(self.size(part), 0) for part in self.parts}
366 | 
367 |         # if there are no categorical features, return only numerical features
368 |         if self.cat_policy == 'drop' or not self.x_cat:
369 |             assert x_num is not None
370 |             x_num = to_tensors(x_num)
371 |             # if x_cat is None replace with empty tensor for dataloader
372 |             x_cat = {part: torch.empty(self.size(part), 0) for part in self.parts}
373 |             return [x_num, x_cat]
374 | 
375 |         x_cat = deepcopy(self.x_cat)
376 |         # x_cat_nan_masks = {k: v == '___null___' for k, v in x_cat.items()}
377 |         x_cat = self.encode_categorical_features(x_cat)
378 | 
379 |         x_cat, x_num = to_tensors(x_cat), to_tensors(x_num)
380 |         result = self.concatenate_data(x_cat, x_num)
381 | 
382 |         return result
383 | 
384 |     def build_y(self):
385 |         if self.is_regression:
386 |             assert self.y_policy == 'mean_std'
387 |         y = deepcopy(self.y)
388 |         if self.y_policy:
389 |             if not self.is_regression:
390 |                 warnings.warn('y_policy is not None, but the task is NOT regression')
391 |                 info = None
392 |             elif self.y_policy == 'mean_std':
393 |                 mean, std = self.y['train'].mean(), self.y['train'].std()
394 |                 y = {k: (v - mean) / std for k, v in y.items()}
395 |                 info = {'policy': self.y_policy, 'mean': mean, 'std': std}
396 |             else:
397 |                 raise ValueError('Unknown y policy')
398 |         else:
399 |             info = None
400 | 
401 |         y = to_tensors(y)
402 |         if self.is_regression or self.is_binclass:
403 |             y = {part: y[part].float() for part in self.parts}
404 |         return y, info
405 | 
406 | 
407 | def to_tensors(data):
408 |     return {k: torch.as_tensor(v) for k, v in data.items()}
409 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/get_demo_dataset.py:
--------------------------------------------------------------------------------
 1 | """ get_demo_dataset.py
 2 |     Utilities for splitting yeast data into upstream and downstream tasks
 3 |     Developed for Tabular-Transfer-Learning project
 4 |     March 2022
 5 |     Data link: http://mulan.sourceforge.net/datasets-mlc.html
 6 | """
 7 | import numpy as np
 8 | import scipy
 9 | import pandas as pd
10 | from scipy.io import arff
11 | from sklearn import preprocessing
12 | from sklearn.model_selection import train_test_split
13 | data, meta = scipy.io.arff.loadarff('data/yeast/yeast.arff')
14 | df = pd.DataFrame(data)
15 | 
16 | target_columns = [col for col in df.columns if 'Class' in col]
17 | non_target_columns = [col for col in df.columns if 'Class' not in col]
18 | 
19 | le = preprocessing.LabelEncoder()
20 | Y = df[target_columns].apply(le.fit_transform)
21 | print(Y)
22 | 
23 | X = df[non_target_columns]
24 | 
25 | downstream_target_index = 5
26 | downstream_target = target_columns[downstream_target_index]
27 | target_columns.pop(downstream_target_index)
28 | 
29 | X_upstream, X_downstream, Y_upstream, Y_downstream = train_test_split(X, Y, test_size=0.2, random_state=0)
30 | Y_downstream = Y_downstream[downstream_target]
31 | Y_upstream = Y_upstream[target_columns]
32 | 
33 | X_upstream.to_csv('data/yeast_upstream/N.csv', index = False)
34 | Y_upstream.to_csv('data/yeast_upstream/y.csv', index = False)
35 | 
36 | X_downstream.to_csv('data/yeast_downstream/N.csv', index = False)
37 | Y_downstream.to_csv('data/yeast_downstream/y.csv', index = False)
38 | 
39 | #0.628 with TL
40 | #0.578 with no TL
41 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/mimic_tools.py:
--------------------------------------------------------------------------------
  1 | """ mimic_tools.py
  2 |     Utilities for splitting MetaMIMIC data into upstream and downstream tasks
  3 |     Developed for Tabular-Transfer-Learning project
  4 |     March 2022
  5 | """
  6 | 
  7 | import numpy as np
  8 | from sklearn.preprocessing import LabelEncoder
  9 | import pickle
 10 | import os
 11 | import random
 12 | import pandas as pd
 13 | import torch
 14 | import sklearn
 15 | from sklearn.model_selection import train_test_split
 16 | 
 17 | def split_mimic():
 18 |     mimic = pd.read_csv('../../../data/mimic/metaMIMIC.csv', delimiter = ',')
 19 |     mimic_target_columns = ['diabetes_diagnosed', 'hypertensive_diagnosed', 'ischematic_diagnosed',
 20 |                               'heart_diagnosed', 'overweight_diagnosed', 'anemia_diagnosed', 'respiratory_diagnosed',
 21 |                               'hypotension_diagnosed', 'lipoid_diagnosed', 'atrial_diagnosed', 'purpura_diagnosed', 'alcohol_diagnosed']
 22 |     y_full = mimic[mimic_target_columns]
 23 |     mimic.drop(columns = ['subject_id'], inplace = True)
 24 |     mimic.drop(columns = mimic_target_columns, inplace = True)
 25 |     X_full = mimic.astype('float')
 26 |     categorical_columns = ['gender']
 27 |     numerical_columns = list(X_full.columns[X_full.columns != 'gender'])
 28 |     X_full.loc[X_full['gender'] == 1, 'gender'] = 'male'
 29 |     X_full.loc[X_full['gender'] == 0, 'gender'] = 'female'
 30 | 
 31 | 
 32 |     X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=1)
 33 |     X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1875, random_state=1) # 0.1875 x 0.8 = 0.15
 34 | 
 35 |     X_train.to_csv('../../../data/mimic/mimic_train_X.csv', index = False)
 36 |     X_val.to_csv('../../../data/mimic/mimic_val_X.csv', index = False)
 37 |     X_test.to_csv('../../../data/mimic/mimic_test_X.csv', index = False)
 38 |     y_train.to_csv('../../../data/mimic/mimic_train_y.csv', index = False)
 39 |     y_val.to_csv('../../../data/mimic/mimic_val_y.csv', index = False)
 40 |     y_test.to_csv('../../../data/mimic/mimic_test_y.csv', index = False)
 41 |     return
 42 | 
 43 | 
 44 | 
 45 | def data_prep_transfer_mimic(ds_id, task, stage='pretrain', downstream_target=0, downstream_samples_per_class = 2):
 46 |     """
 47 |     Function to create a transfer learning task based on the metaMIMIC data
 48 |     """
 49 |     seed = 0
 50 |     np.random.seed(seed)
 51 | 
 52 |     mimic_target_columns = ['diabetes_diagnosed', 'hypertensive_diagnosed', 'ischematic_diagnosed',
 53 |                             'heart_diagnosed', 'overweight_diagnosed', 'anemia_diagnosed', 'respiratory_diagnosed',
 54 |                             'hypotension_diagnosed', 'lipoid_diagnosed', 'atrial_diagnosed', 'purpura_diagnosed',
 55 |                             'alcohol_diagnosed']
 56 |     X_train = pd.read_csv('../../../data/mimic/mimic_train_X.csv')
 57 |     X_val = pd.read_csv('../../../data/mimic/mimic_val_X.csv')
 58 |     X_test = pd.read_csv('../../../data/mimic/mimic_test_X.csv')
 59 |     y_train_full = pd.read_csv('../../../data/mimic/mimic_train_y.csv')
 60 |     y_val_full = pd.read_csv('../../../data/mimic/mimic_val_y.csv')
 61 |     y_test_full = pd.read_csv('../../../data/mimic/mimic_test_y.csv')
 62 |     categorical_columns = ['gender']
 63 |     numerical_columns = list(X_train.columns[X_train.columns != 'gender'])
 64 |     X_train[categorical_columns] = X_train[categorical_columns].fillna("MissingValue")
 65 |     X_val[categorical_columns] = X_val[categorical_columns].fillna("MissingValue")
 66 |     X_test[categorical_columns] = X_test[categorical_columns].fillna("MissingValue")
 67 |     print(numerical_columns)
 68 |     print(categorical_columns)
 69 | 
 70 |     if task == 'binclass':
 71 |         if 'downstream' in stage:
 72 |             #Merge validation set into train, keep the dummy validation set for the code not to fail
 73 |             y_train_full = pd.concat([y_train_full, y_val_full], ignore_index=True)
 74 |             X_train = pd.concat([X_train, X_val], ignore_index=True)
 75 |             print('Using downstream target:', mimic_target_columns[downstream_target])
 76 |             y_train = y_train_full[mimic_target_columns[downstream_target]]
 77 |             y_val = y_val_full[mimic_target_columns[downstream_target]]
 78 |             y_test = y_test_full[mimic_target_columns[downstream_target]]
 79 |         elif 'pretrain' in stage:
 80 |             #Do multitarget in regular pretrain
 81 |             print('Dropping downstream target:', mimic_target_columns[downstream_target])
 82 |             y_train = y_train_full.drop(columns=[mimic_target_columns[downstream_target]])
 83 |             y_val = y_val_full.drop(columns=[mimic_target_columns[downstream_target]])
 84 |             y_test = y_test_full.drop(columns=[mimic_target_columns[downstream_target]])
 85 |         else:
 86 |             raise ValueError('Stage is incorrect!')
 87 |     else:
 88 |         raise NotImplementedError('Mimic only accepts binclass tasks: binclass with multiple targets for pretraining and binclass with a single target for downstream')
 89 | 
 90 |     X_train_full = X_train.copy()
 91 |     y_train_full = y_train.copy()
 92 |     if ('downstream' in stage):
 93 |         #switching to downstream_samples_per_class
 94 |         print('Total num classes:', len(set(y_train)))
 95 |         total_num_of_classes = len(set(y_train))
 96 |         X_train, _, y_train, _ = train_test_split(X_train, y_train,
 97 |                                        train_size=downstream_samples_per_class * len(set(y_train)),
 98 |                                        stratify=y_train, random_state = seed)
 99 |         print('Sample num classes:', len(set(y_train)))
100 |         sample_num_classes = len(set(y_train))
101 |         if sample_num_classes < total_num_of_classes:
102 |             print('Resampling and guaranteeing at least one sample per class')
103 |             X_train, y_train = stratified_sample_at_least_one_per_class(X_train_full, y_train_full, downstream_samples_per_class, seed)
104 |             sample_num_classes = len(set(y_train))
105 |             print('New sample num classes:', len(set(y_train)))
106 |         assert total_num_of_classes == sample_num_classes
107 | 
108 | 
109 |     X_cat_train = X_train[categorical_columns].values
110 |     X_num_train = X_train[numerical_columns].values
111 |     y_train = y_train.values.astype('float')
112 | 
113 |     X_cat_val = X_val[categorical_columns].values
114 |     X_num_val = X_val[numerical_columns].values
115 |     y_val = y_val.values.astype('float')
116 | 
117 |     X_cat_test = X_test[categorical_columns].values
118 |     X_num_test = X_test[numerical_columns].values
119 |     y_test = y_test.values.astype('float')
120 | 
121 |     info = {}
122 |     info['name'] = ds_id
123 |     info['stage'] = stage
124 |     info['split'] = seed
125 |     info['task_type'] = task
126 |     info['n_num_features'] = len(numerical_columns)
127 |     info['n_cat_features'] = len(categorical_columns)
128 |     info['train_size'] = X_train.shape[0]
129 |     info['val_size'] = X_val.shape[0]
130 |     info['test_size'] = X_test.shape[0]
131 | 
132 | 
133 |     if len(y_train.shape) > 1:
134 |         info['n_classes'] = y_train.shape[1]
135 |     else:
136 |         info['n_classes'] = 1
137 | 
138 |     if len(numerical_columns) > 0:
139 |         #We should not have access to a validation set in the limited data regime, replace it with train to make sure
140 |         if ('downstream' in stage):
141 |             X_num_val = X_num_train
142 |         numerical_data = {'train': X_num_train, 'val': X_num_val, 'test': X_num_test}
143 |     else:
144 |         numerical_data = None
145 | 
146 |     if len(categorical_columns) > 0:
147 |         #We should not have access to a validation set in the limited data regime, replace it with train to make sure
148 |         if ('downstream' in stage):
149 |             X_cat_val = X_cat_train
150 |         categorical_data = {'train': X_cat_train, 'val': X_cat_val, 'test': X_cat_test}
151 |     else:
152 |         categorical_data = None
153 | 
154 |     #We should not have access to a validation set in the limited data regime, replace it with train to make sure
155 |     if ('downstream' in stage):
156 |         y_val = y_train
157 |     targets = {'train': y_train, 'val': y_val, 'test': y_test}
158 |     print('\n Train size:{} Val size:{} Test size:{}'.format(len(y_train), len(y_val), len(y_test)))
159 | 
160 |     if len(categorical_columns) > 0:
161 |         #this only works with mimic since the only categorical feature is gender
162 |         full_cat_data_for_encoder = X_train_full[categorical_columns]
163 |     else:
164 |         full_cat_data_for_encoder = None
165 | 
166 |     return numerical_data, categorical_data, targets, info, full_cat_data_for_encoder
167 | 
168 | 
169 | def stratified_sample_at_least_one_per_class(X_train, y_train, downstream_samples_per_class, seed):
170 |     # Sample 1 element per class
171 |     X_train['y'] = y_train
172 |     X_one_sample = X_train.groupby(by='y').sample(n=1)
173 |     y_one_sample = X_one_sample['y']
174 |     X_one_sample = X_one_sample.drop(columns=['y'])
175 |     # Add a stratified sample from the rest of the data
176 |     X_train = X_train[~X_train.index.isin(X_one_sample.index)]
177 |     y_train = X_train['y']
178 |     X_train = X_train.drop(columns=['y'])
179 |     X_train, _, y_train, _ = train_test_split(X_train, y_train,
180 |                                               train_size=downstream_samples_per_class * len(set(y_train)) - len(
181 |                                                   X_one_sample),
182 |                                               stratify=y_train, random_state=seed)
183 |     X_train = pd.concat([X_train, X_one_sample], axis=0)
184 |     y_train = pd.concat([y_train, y_one_sample], axis=0)
185 |     return X_train, y_train
186 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/testing.py:
--------------------------------------------------------------------------------
  1 | """ testing.py
  2 |     Utilities for testing models
  3 |     Developed for Tabular-Transfer-Learning project
  4 |     March 2022
  5 |     Some functionality adopted from https://github.com/Yura52/rtdl
  6 | """
  7 | 
  8 | import torch
  9 | from sklearn.metrics import accuracy_score, mean_squared_error, balanced_accuracy_score, roc_auc_score
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | # Ignore statements for pylint:
 14 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
 15 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
 16 | #     Too many local variables (R0914), Missing docstring (C0116, C0115, C0114).
 17 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115, C0114
 18 | 
 19 | 
 20 | def evaluate_model(net, loaders, task, device):
 21 |     scores = []
 22 |     for loader in loaders:
 23 |         score = test_default(net, loader, task, device)
 24 |         scores.append(score)
 25 |     return scores
 26 | 
 27 | 
 28 | def test_default(net, testloader, task, device):
 29 |     net.eval()
 30 |     targets_all = []
 31 |     predictions_all = []
 32 |     with torch.no_grad():
 33 |         for batch_idx, (inputs_num, inputs_cat, targets) in enumerate(tqdm(testloader, leave=False)):
 34 |             inputs_num, inputs_cat, targets = inputs_num.to(device).float(), inputs_cat.to(device), targets.to(device)
 35 |             inputs_num, inputs_cat = inputs_num if inputs_num.nelement() != 0 else None, \
 36 |                                      inputs_cat if inputs_cat.nelement() != 0 else None
 37 | 
 38 |             outputs = net(inputs_num, inputs_cat)
 39 |             if task == "multiclass":
 40 |                 predicted = torch.argmax(outputs, dim=1)
 41 |             elif task == "binclass":
 42 |                 predicted = outputs
 43 |             elif task == "regression":
 44 |                 predicted = outputs
 45 |             targets_all.extend(targets.cpu().tolist())
 46 |             predictions_all.extend(predicted.cpu().tolist())
 47 | 
 48 |     if task == "multiclass":
 49 |         accuracy = accuracy_score(targets_all, predictions_all)
 50 |         balanced_accuracy = balanced_accuracy_score(targets_all, predictions_all, adjusted=False)
 51 |         balanced_accuracy_adjusted = balanced_accuracy_score(targets_all, predictions_all, adjusted=True)
 52 |         scores = {"score": accuracy,
 53 |                   "accuracy": accuracy,
 54 |                   "balanced_accuracy": balanced_accuracy,
 55 |                   "balanced_accuracy_adjusted": balanced_accuracy_adjusted}
 56 |     elif task == "regression":
 57 |         rmse = mean_squared_error(targets_all, predictions_all, squared=False)
 58 |         scores = {"score": -rmse,
 59 |                   "rmse": -rmse}
 60 |     elif task == "binclass":
 61 |         roc_auc = roc_auc_score(targets_all, predictions_all)
 62 |         scores = {"score": roc_auc,
 63 |                   "roc_auc": roc_auc}
 64 |     return scores
 65 | 
 66 | 
 67 | def evaluate_backbone(embedders, backbone, heads, loaders, tasks, device):
 68 |     scores = {}
 69 |     for k in loaders.keys():
 70 |         score = evaluate_backbone_one_dataset(embedders[k], backbone, heads[k], loaders[k], tasks[k], device)
 71 |         scores[k] = score
 72 |     return scores
 73 | 
 74 | 
 75 | def evaluate_backbone_one_dataset(embedder, backbone, head, testloader, task, device):
 76 |     embedder.eval()
 77 |     backbone.eval()
 78 |     head.eval()
 79 |     targets_all = []
 80 |     predictions_all = []
 81 |     with torch.no_grad():
 82 |         for batch_idx, (inputs_num, inputs_cat, targets) in enumerate(tqdm(testloader, leave=False)):
 83 |             inputs_num, inputs_cat, targets = inputs_num.to(device).float(), inputs_cat.to(device), targets.to(device)
 84 |             inputs_num, inputs_cat = inputs_num if inputs_num.nelement() != 0 else None, \
 85 |                                      inputs_cat if inputs_cat.nelement() != 0 else None
 86 | 
 87 |             embedding = embedder(inputs_num, inputs_cat)
 88 |             features = backbone(embedding)
 89 |             outputs = head(features)
 90 | 
 91 |             if task == "multiclass":
 92 |                 predicted = torch.argmax(outputs, dim=1)
 93 |             elif task == "binclass":
 94 |                 predicted = outputs
 95 |             elif task == "regression":
 96 |                 predicted = outputs
 97 |             targets_all.extend(targets.cpu().tolist())
 98 |             predictions_all.extend(predicted.cpu().tolist())
 99 | 
100 |     if task == "multiclass":
101 |         accuracy = accuracy_score(targets_all, predictions_all)
102 |         balanced_accuracy = balanced_accuracy_score(targets_all, predictions_all, adjusted=False)
103 |         balanced_accuracy_adjusted = balanced_accuracy_score(targets_all, predictions_all, adjusted=True)
104 |         scores = {"score": accuracy,
105 |                   "accuracy": accuracy,
106 |                   "balanced_accuracy": balanced_accuracy,
107 |                   "balanced_accuracy_adjusted": balanced_accuracy_adjusted}
108 |     elif task == "regression":
109 |         rmse = mean_squared_error(targets_all, predictions_all, squared=False)
110 |         scores = {"score": -rmse,
111 |                   "rmse": -rmse}
112 |     elif task == "binclass":
113 |         roc_auc = roc_auc_score(targets_all, predictions_all)
114 |         scores = {"score": roc_auc,
115 |                   "roc_auc": roc_auc}
116 |     return scores
117 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/tools.py:
--------------------------------------------------------------------------------
  1 | """ tools.py
  2 |     Utility functions that are common to all tasks
  3 |     Developed for Tabular-Transfer-Learning project
  4 |     March 2022
  5 | """
  6 | 
  7 | import logging
  8 | import os
  9 | import random
 10 | from collections import OrderedDict
 11 | import torch
 12 | from icecream import ic
 13 | from torch.optim import SGD, Adam, AdamW
 14 | from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, LambdaLR, ChainedScheduler
 15 | from torch.utils.data import TensorDataset, DataLoader
 16 | 
 17 | import deep_tabular.models as models
 18 | from .data_tools import get_data, get_categories_full_cat_data, TabularDataset, get_multilabel_data
 19 | from .warmup import ExponentialWarmup, LinearWarmup
 20 | from ..adjectives import adjectives
 21 | from ..names import names
 22 | from .mimic_tools import data_prep_transfer_mimic
 23 | 
 24 | 
 25 | # Ignore statements for pylint:
 26 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
 27 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
 28 | #     Too many local variables (R0914), Missing docstring (C0116, C0115).
 29 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115
 30 | 
 31 | def generate_run_id():
 32 |     hashstr = f"{adjectives[random.randint(0, len(adjectives))]}-{names[random.randint(0, len(names))]}"
 33 |     return hashstr
 34 | 
 35 | 
 36 | def write_to_tb(stats, stat_names, epoch, writer):
 37 |     for name, stat in zip(stat_names, stats):
 38 |         stat_name = os.path.join("val", name)
 39 |         writer.add_scalar(stat_name, stat, epoch)
 40 | 
 41 | 
 42 | def get_dataloaders(cfg, which_dataset=None):
 43 |     """
 44 |     cfg: OmegaConf, dictionary of configurations
 45 |     which_dataset: int of None, indicates which dataset to use if multiple are specified in the config
 46 |     """
 47 | 
 48 |     if which_dataset is not None:
 49 |         cfg_dataset = cfg.dataset[which_dataset]
 50 |     else:
 51 |         cfg_dataset = cfg.dataset
 52 | 
 53 |     if cfg_dataset.task == 'multilabel':
 54 |         #Changing the task to binclass because multilabel is just binary cross-entropy over multilabel logits
 55 |         cfg_dataset.task = 'binclass'
 56 |         x_numerical, x_categorical, y, info, full_cat_data_for_encoder = get_multilabel_data(ds_id=cfg_dataset.name,
 57 |                                                                                              source=cfg_dataset.source,
 58 |                                                                                              task=cfg_dataset.task)
 59 |     elif cfg_dataset.name == 'mimic':
 60 |         x_numerical, x_categorical, y, info, full_cat_data_for_encoder = data_prep_transfer_mimic(ds_id=cfg_dataset.name,
 61 |                                                                                                  task=cfg_dataset.task,
 62 |                                                                                                  stage=cfg_dataset.stage,
 63 |                                                                                                  downstream_target=cfg_dataset.downstream_target,
 64 |                                                                                                  downstream_samples_per_class=cfg_dataset.downstream_sample_num//2)
 65 |     else:
 66 |         x_numerical, x_categorical, y, info, full_cat_data_for_encoder = get_data(dataset_id=cfg_dataset.name,
 67 |                                                                                   source=cfg_dataset.source,
 68 |                                                                                   task=cfg_dataset.task,
 69 |                                                                                   datasplit=[.65, .15, .2])
 70 | 
 71 |     dataset = TabularDataset(x_numerical, x_categorical, y, info, normalization=cfg_dataset.normalization,
 72 |                              cat_policy="indices",
 73 |                              seed=0,
 74 |                              full_cat_data_for_encoder=full_cat_data_for_encoder,
 75 |                              y_policy=cfg_dataset.y_policy,
 76 |                              normalizer_path=cfg_dataset.normalizer_path,
 77 |                              stage=cfg_dataset.stage)
 78 | 
 79 |     X = dataset.preprocess_data()
 80 |     Y, y_info = dataset.build_y()
 81 |     unique_categories = get_categories_full_cat_data(full_cat_data_for_encoder)
 82 |     n_numerical = dataset.n_num_features
 83 |     n_categorical = dataset.n_cat_features
 84 |     n_classes = dataset.n_classes
 85 |     logging.info(f"Task: {cfg_dataset.task}, Dataset: {cfg_dataset.name}, n_numerical: {n_numerical}, "
 86 |                  f"n_categorical: {n_categorical}, n_classes: {n_classes}, n_train_samples: {dataset.size('train')}, "
 87 |                  f"n_val_samples: {dataset.size('val')}, n_test_samples: {dataset.size('test')}")
 88 | 
 89 |     trainset = TensorDataset(X[0]["train"], X[1]["train"], Y["train"])
 90 |     valset = TensorDataset(X[0]["val"], X[1]["val"], Y["val"])
 91 |     testset = TensorDataset(X[0]["test"], X[1]["test"], Y["test"])
 92 | 
 93 |     trainloader = DataLoader(trainset, batch_size=cfg.hyp.train_batch_size, shuffle=True, drop_last=True)
 94 |     valloader = DataLoader(valset, batch_size=cfg.hyp.test_batch_size, shuffle=False, drop_last=False)
 95 |     testloader = DataLoader(testset, batch_size=cfg.hyp.test_batch_size, shuffle=False, drop_last=False)
 96 | 
 97 | 
 98 |     loaders = {"train": trainloader, "val": valloader, "test": testloader}
 99 |     return loaders, unique_categories, n_numerical, n_classes
100 | 
101 | 
102 | def get_model(model, num_numerical, unique_categories, num_outputs, d_embedding, model_params):
103 |     model = model.lower()
104 |     net = getattr(models, model)(num_numerical, unique_categories, num_outputs, d_embedding, model_params)
105 |     return net
106 | 
107 | 
108 | def get_embedder(cfg, num_numerical, unique_categories):
109 |     model_name = cfg.model.name.lower()
110 |     if model_name == "ft_transformer":
111 |         embedder = models.ft_tokenizer(num_numerical, unique_categories, cfg.model.d_embedding, cfg.model.token_bias)
112 |     else:
113 |         raise NotImplementedError(f"Model name is {model_name}, but this is not yet implemented.")
114 |     return embedder
115 | 
116 | 
117 | class Squeeze(torch.nn.Module):
118 |     def forward(self, x):
119 |         return torch.squeeze(x)
120 | 
121 | 
122 | 
123 | def get_backbone(cfg, device):
124 |     model_name = cfg.model.name.lower()
125 |     if model_name == "ft_transformer":
126 |         net = models.ft_backbone(cfg.model)
127 |     else:
128 |         raise NotImplementedError(f"Model name is {model_name}, but this is not yet implemented.")
129 |     if cfg.model.model_path is not None:
130 |         logging.info(f"Loading backbone from checkpoint {cfg.model.model_path}...")
131 |         state_dict = torch.load(cfg.model.model_path, map_location=device)
132 |         net.load_state_dict(state_dict["backbone"])
133 |     net = net.to(device)
134 |     return net
135 | 
136 | 
137 | def get_optimizer_for_single_net(optim_args, net, state_dict):
138 |     warmup = ExponentialWarmup if optim_args.warmup_type == "exponential" else LinearWarmup
139 | 
140 |     if optim_args.head_lr is not None:
141 |         head_name, head_module = list(net.named_modules())[-1]
142 |         head_parameters = [v for k, v in net.named_parameters() if head_name in k]
143 |         feature_extractor_parameters = [v for k, v in net.named_parameters() if head_name not in k]
144 |         all_params = [{'params': feature_extractor_parameters},
145 |                     {'params': head_parameters, 'lr': optim_args.head_lr}]
146 |     else:
147 |         all_params = [{"params": [p for n, p in net.named_parameters()]}]
148 | 
149 |     if optim_args.optimizer.lower() == "sgd":
150 |         optimizer = SGD(all_params, lr=optim_args.lr, weight_decay=optim_args.weight_decay,
151 |                         momentum=optim_args.momentum)
152 |     elif optim_args.optimizer.lower() == "adam":
153 |         optimizer = Adam(all_params, lr=optim_args.lr, weight_decay=optim_args.weight_decay)
154 |     elif optim_args.optimizer.lower() == "adamw":
155 |         optimizer = AdamW(all_params, lr=optim_args.lr, weight_decay=optim_args.weight_decay)
156 |     else:
157 |         raise ValueError(f"{ic.format()}: Optimizer choice of {optim_args.optimizer.lower()} not yet implmented. "
158 |                          f"Should be one of ['sgd', 'adam', 'adamw'].")
159 | 
160 |     if state_dict is not None:
161 |         optimizer.load_state_dict(state_dict)
162 |         warmup_scheduler = warmup(optimizer, warmup_period=0)
163 |     else:
164 |         warmup_scheduler = warmup(optimizer, warmup_period=optim_args.warmup_period)
165 | 
166 |     if optim_args.lr_decay.lower() == "step":
167 |         lr_scheduler = MultiStepLR(optimizer, milestones=optim_args.lr_schedule,
168 |                                    gamma=optim_args.lr_factor, last_epoch=-1)
169 |     elif optim_args.lr_decay.lower() == "cosine":
170 |         lr_scheduler = CosineAnnealingLR(optimizer, optim_args.epochs, eta_min=0, last_epoch=-1, verbose=False)
171 |     else:
172 |         raise ValueError(f"{ic.format()}: Learning rate decay style {optim_args.lr_decay} not yet implemented.")
173 | 
174 |     #Freeze feature extractor and warm the head for some period
175 |     if optim_args.head_warmup_period is not None:
176 |         #Multiply the feature extractor lr by 0 during the head warmup period
177 |         lambda_feature_extractor = lambda epoch: 0 if epoch < optim_args.head_warmup_period else 1
178 |         lambda_head = lambda epoch: 1
179 |         head_warmup_scheduler = LambdaLR(optimizer, lr_lambda = [lambda_feature_extractor, lambda_head])
180 |         lr_scheduler = ChainedScheduler([head_warmup_scheduler, lr_scheduler])
181 |     return optimizer, warmup_scheduler, lr_scheduler
182 | 
183 | 
184 | def get_optimizer_for_backbone(optim_args, embedders, backbone, heads, state_dict=None):
185 |     warmup = ExponentialWarmup if optim_args.warmup_type == "exponential" else LinearWarmup
186 | 
187 |     all_params = [{"params": [p for p in backbone.parameters()], "lr": optim_args.lr}]
188 |     all_params.extend([{f"params": [p for p in v.parameters()],
189 |                         "lr": optim_args.lr_for_embedders} for v in embedders.values()])
190 |     all_params.extend([{f"params": [p for p in v.parameters()],
191 |                         "lr": optim_args.lr_for_heads} for v in heads.values()])
192 | 
193 |     if optim_args.optimizer.lower() == "adamw":
194 |         optimizer = AdamW(all_params, weight_decay=optim_args.weight_decay)
195 |     elif optim_args.optimizer.lower() == "sgd":
196 |         optimizer = SGD(all_params, momentum=0.9, weight_decay=optim_args.weight_decay)
197 |     else:
198 |         raise ValueError(f"{ic.format()}: Optimizer choice of {optim_args.optimizer.lower()} not yet implmented. "
199 |                          f"Should be one of ['adamw'].")
200 | 
201 |     if state_dict is not None:
202 |         optimizer.load_state_dict(state_dict)
203 |         warmup_scheduler = warmup(optimizer, warmup_period=0)
204 |     else:
205 |         warmup_scheduler = warmup(optimizer, warmup_period=optim_args.warmup_period)
206 | 
207 |     if optim_args.lr_decay.lower() == "step":
208 |         lr_scheduler = MultiStepLR(optimizer, milestones=optim_args.lr_schedule,
209 |                                    gamma=optim_args.lr_factor, last_epoch=-1)
210 |     elif optim_args.lr_decay.lower() == "cosine":
211 |         lr_scheduler = CosineAnnealingLR(optimizer, optim_args.epochs, eta_min=0, last_epoch=-1, verbose=False)
212 |     else:
213 |         raise ValueError(f"{ic.format()}: Learning rate decay style {optim_args.lr_decay} not yet implemented.")
214 | 
215 |     return optimizer, warmup_scheduler, lr_scheduler
216 | 
217 | 
218 | def get_criterion(task):
219 |     if task == "multiclass":
220 |         criterion = torch.nn.CrossEntropyLoss()
221 |     elif task == "binclass":
222 |         criterion = torch.nn.BCEWithLogitsLoss()
223 |     elif task == "regression":
224 |         criterion = torch.nn.MSELoss()
225 |     else:
226 |         raise ValueError(f"No loss function implemented for task {task}.")
227 |     return criterion
228 | 
229 | def get_head(model_name, net):
230 |     if model_name in ['ft_transformer', 'resnet', 'mlp']:
231 |         head_name = 'head'
232 |         head_module = net.head
233 |     else:
234 |         head_name, head_module = list(net.named_modules())[-1]
235 |     print(f'Original head: {head_name}, {head_module}\n')
236 |     return head_name, head_module
237 | 
238 | def remove_parallel(state_dict):
239 |     ''' state_dict: state_dict of model saved with DataParallel()
240 |         returns state_dict without extra module level '''
241 |     new_state_dict = OrderedDict()
242 |     for k, v in state_dict.items():
243 |         name = k[7:] # remove module.
244 |         new_state_dict[name] = v
245 |     return new_state_dict
246 | 
247 | def load_transfer_model_from_checkpoint(model_args, num_numerical, unique_categories, num_outputs, device):
248 |     model = model_args.name
249 |     model_path = model_args.model_path
250 |     d_embedding = model_args.d_embedding
251 |     use_mlp_head = model_args.use_mlp_head
252 |     freeze_feature_extractor = model_args.freeze_feature_extractor
253 |     epoch = 0
254 |     optimizer = None
255 | 
256 |     net = get_model(model, num_numerical, unique_categories, num_outputs, d_embedding, model_args)
257 |     net = net.to(device)
258 |     head_name, head_module = get_head(model_args.name, net)
259 |     if model_path is not None:
260 |         logging.info(f"Loading model from checkpoint {model_path}...")
261 |         state_dict = torch.load(model_path, map_location=device)
262 |         if device == "cuda":
263 |             state_dict["net"] = remove_parallel(state_dict["net"])
264 |         pretrained_feature_extractor_dict = {k: v for k, v in state_dict["net"].items() if head_name not in k}
265 |         missing_keys, unexpected_keys = net.load_state_dict(pretrained_feature_extractor_dict, strict = False)
266 |         print('State dict successfully loaded from pretrained checkpoint. Original head reinitialized.')
267 |         print('Missing keys:{}\nUnexpected keys:{}\n'.format(missing_keys, unexpected_keys))
268 |         # epoch = state_dict["epoch"] + 1
269 |         # optimizer = state_dict["optimizer"]
270 |     if freeze_feature_extractor:
271 |         trainable_params = []
272 |         for name, param in net.named_parameters():
273 |             if not any(x in name for x in [head_name]):
274 |                 # if head_name not in name:
275 |                 param.requires_grad = False
276 |             else:
277 |                 trainable_params.append(name)
278 |         print(f'Feature extractor frozen. Trainable params: {trainable_params}')
279 | 
280 |     if use_mlp_head:
281 |         emb_dim = head_module.in_features
282 |         out_dim = head_module.out_features
283 |         head_module = torch.nn.Sequential(
284 |             torch.nn.Linear(emb_dim, 200),
285 |             torch.nn.ReLU(),
286 |             torch.nn.Linear(200, 200),
287 |             torch.nn.ReLU(),
288 |             torch.nn.Linear(200, out_dim)).to(device)
289 |         setattr(net, head_name, head_module)
290 |     print('New head set to:', net.head)
291 |     if device == "cuda":
292 |         net = torch.nn.DataParallel(net)
293 |     return net, epoch, optimizer
294 | 
295 | 
296 | def load_model_from_checkpoint(model_args, num_numerical, unique_categories, num_outputs, device):
297 |     model = model_args.name
298 |     model_path = model_args.model_path
299 |     d_embedding = model_args.d_embedding
300 |     epoch = 0
301 |     optimizer = None
302 | 
303 |     net = get_model(model, num_numerical, unique_categories, num_outputs, d_embedding, model_args)
304 |     net = net.to(device)
305 |     if device == "cuda":
306 |         net = torch.nn.DataParallel(net)
307 |     if model_path is not None:
308 |         logging.info(f"Loading model from checkpoint {model_path}...")
309 |         state_dict = torch.load(model_path, map_location=device)
310 |         net.load_state_dict(state_dict["net"])
311 |         epoch = state_dict["epoch"] + 1
312 |         optimizer = state_dict["optimizer"]
313 | 
314 |     return net, epoch, optimizer
315 | 
316 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/training.py:
--------------------------------------------------------------------------------
 1 | """ training.py
 2 |     Utilities for training models
 3 |     Developed for Tabular-Transfer-Learning project
 4 |     March 2022
 5 | """
 6 | 
 7 | import random
 8 | from dataclasses import dataclass
 9 | from typing import Any
10 | 
11 | # from icecream import ic
12 | from tqdm import tqdm
13 | 
14 | 
15 | # Ignore statemenst for pylint:
16 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
17 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
18 | #     Too many local variables (R0914), Missing docstring (C0116, C0115, C0114),
19 | #     Unused import (W0611).
20 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115, C0114, W0611
21 | 
22 | 
23 | @dataclass
24 | class TrainingSetup:
25 |     """Attributes to describe the training precedure"""
26 |     criterions: Any
27 |     optimizer: Any
28 |     scheduler: Any
29 |     warmup: Any
30 |     num_datasets_in_batch: Any = None
31 | 
32 | 
33 | def default_training_loop(net, trainloader, train_setup, device):
34 |     net.train()
35 |     optimizer = train_setup.optimizer
36 |     lr_scheduler = train_setup.scheduler
37 |     warmup_scheduler = train_setup.warmup
38 |     criterion = train_setup.criterions
39 | 
40 |     train_loss = 0
41 |     total = 0
42 | 
43 |     for batch_idx, (inputs_num, inputs_cat, targets) in enumerate(tqdm(trainloader, leave=False)):
44 |         inputs_num, inputs_cat, targets = inputs_num.to(device).float(), inputs_cat.to(device), targets.to(device)
45 |         inputs_num, inputs_cat = inputs_num if inputs_num.nelement() != 0 else None, \
46 |                                  inputs_cat if inputs_cat.nelement() != 0 else None
47 | 
48 |         optimizer.zero_grad()
49 |         outputs = net(inputs_num, inputs_cat)
50 |         loss = criterion(outputs, targets)
51 |         loss.backward()
52 |         optimizer.step()
53 | 
54 |         train_loss += loss.item()
55 |         total += targets.size(0)
56 | 
57 |     train_loss = train_loss / (batch_idx + 1)
58 | 
59 |     lr_scheduler.step()
60 |     warmup_scheduler.dampen()
61 | 
62 |     return train_loss
63 | 
64 | 


--------------------------------------------------------------------------------
/deep_tabular/utils/warmup.py:
--------------------------------------------------------------------------------
  1 | """ warmup.py
  2 |     code for warmup learning rate scheduler
  3 |     borrowed from https://github.com/ArneNx/pytorch_warmup/tree/warmup_fix
  4 |     and modified July 2020
  5 | """
  6 | 
  7 | import math
  8 | 
  9 | from torch.optim import Optimizer
 10 | 
 11 | 
 12 | # Ignore statemenst for pylint:
 13 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
 14 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
 15 | #     Too many local variables (R0914).
 16 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914
 17 | 
 18 | 
 19 | class BaseWarmup:
 20 |     """Base class for all warmup schedules
 21 | 
 22 |     Arguments:
 23 |         optimizer (Optimizer): an instance of a subclass of Optimizer
 24 |         warmup_params (list): warmup paramters
 25 |         last_step (int): The index of last step. (Default: -1)
 26 |         warmup_period (int or list): Warmup period
 27 |     """
 28 | 
 29 |     def __init__(self, optimizer, warmup_params, last_step=-1, warmup_period=0):
 30 |         if not isinstance(optimizer, Optimizer):
 31 |             raise TypeError('{} is not an Optimizer'.format(
 32 |                 type(optimizer).__name__))
 33 |         self.optimizer = optimizer
 34 |         self.warmup_params = warmup_params
 35 |         self.last_step = last_step
 36 |         self.base_lrs = [group['lr'] for group in self.optimizer.param_groups]
 37 |         self.warmup_period = warmup_period
 38 |         self.dampen()
 39 | 
 40 |     def state_dict(self):
 41 |         """Returns the state of the warmup scheduler as a :class:`dict`.
 42 | 
 43 |         It contains an entry for every variable in self.__dict__ which
 44 |         is not the optimizer.
 45 |         """
 46 |         return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
 47 | 
 48 |     def load_state_dict(self, state_dict):
 49 |         """Loads the warmup scheduler's state.
 50 | 
 51 |         Arguments:
 52 |             state_dict (dict): warmup scheduler state. Should be an object returned
 53 |                 from a call to :meth:`state_dict`.
 54 |         """
 55 |         self.__dict__.update(state_dict)
 56 | 
 57 |     def dampen(self, step=None):
 58 |         """Dampen the learning rates.
 59 | 
 60 |         Arguments:
 61 |             step (int): The index of current step. (Default: None)
 62 |         """
 63 |         if step is None:
 64 |             step = self.last_step + 1
 65 |         self.last_step = step
 66 |         if isinstance(self.warmup_period, int) and step < self.warmup_period:
 67 |             for i, (group, params) in enumerate(zip(self.optimizer.param_groups,
 68 |                                                     self.warmup_params)):
 69 |                 if isinstance(self.warmup_period, list) and step >= self.warmup_period[i]:
 70 |                     continue
 71 |                 omega = self.warmup_factor(step, **params)
 72 |                 group['lr'] = omega * self.base_lrs[i]
 73 | 
 74 |     def warmup_factor(self, step, warmup_period):
 75 |         """Place holder for objects that inherit BaseWarmup."""
 76 |         raise NotImplementedError
 77 | 
 78 | 
 79 | def get_warmup_params(warmup_period, group_count):
 80 |     if type(warmup_period) == list:
 81 |         if len(warmup_period) != group_count:
 82 |             raise ValueError(
 83 |                 'size of warmup_period does not equal {}.'.format(group_count))
 84 |         for x in warmup_period:
 85 |             if type(x) != int:
 86 |                 raise ValueError(
 87 |                     'An element in warmup_period, {}, is not an int.'.format(
 88 |                         type(x).__name__))
 89 |         warmup_params = [dict(warmup_period=x) for x in warmup_period]
 90 |     elif type(warmup_period) == int:
 91 |         warmup_params = [dict(warmup_period=warmup_period)
 92 |                          for _ in range(group_count)]
 93 |     else:
 94 |         raise TypeError('{} is not a list nor an int.'.format(
 95 |             type(warmup_period).__name__))
 96 |     return warmup_params
 97 | 
 98 | 
 99 | class LinearWarmup(BaseWarmup):
100 |     """Linear warmup schedule.
101 | 
102 |     Arguments:
103 |         optimizer (Optimizer): an instance of a subclass of Optimizer
104 |         warmup_period (int or list): Warmup period
105 |         last_step (int): The index of last step. (Default: -1)
106 |     """
107 | 
108 |     def __init__(self, optimizer, warmup_period, last_step=-1):
109 |         group_count = len(optimizer.param_groups)
110 |         warmup_params = get_warmup_params(warmup_period, group_count)
111 |         super().__init__(optimizer, warmup_params, last_step, warmup_period)
112 | 
113 |     def warmup_factor(self, step, warmup_period):
114 |         return min(1.0, (step+1) / warmup_period)
115 | 
116 | 
117 | class ExponentialWarmup(BaseWarmup):
118 |     """Exponential warmup schedule.
119 | 
120 |     Arguments:
121 |         optimizer (Optimizer): an instance of a subclass of Optimizer
122 |         warmup_period (int or list): Effective warmup period
123 |         last_step (int): The index of last step. (Default: -1)
124 |     """
125 | 
126 |     def __init__(self, optimizer, warmup_period, last_step=-1):
127 |         group_count = len(optimizer.param_groups)
128 |         warmup_params = get_warmup_params(warmup_period, group_count)
129 |         super().__init__(optimizer, warmup_params, last_step, warmup_period)
130 | 
131 |     def warmup_factor(self, step, warmup_period):
132 |         if step + 1 >= warmup_period:
133 |             return 1.0
134 |         else:
135 |             return 1.0 - math.exp(-(step+1) / warmup_period)
136 | 


--------------------------------------------------------------------------------
/optune_from_scratch.py:
--------------------------------------------------------------------------------
  1 | """ optune_from_scratch.py
  2 |     Tune neural networks using Optuna
  3 |     Developed for Tabular Transfer Learning project
  4 |     March 2022
  5 | """
  6 | 
  7 | import train_net_from_scratch
  8 | import hydra
  9 | import optuna
 10 | import sys
 11 | import deep_tabular as dt
 12 | import os
 13 | import copy
 14 | from omegaconf import DictConfig, OmegaConf
 15 | import json
 16 | 
 17 | 
 18 | def sample_value_with_default(trial, name, distr, min, max, default):
 19 |     # chooses suggested or default value with 50/50 chance
 20 |     if distr == 'uniform':
 21 |         value_suggested = trial.suggest_uniform(name, min, max)
 22 |     elif distr == 'loguniform':
 23 |         value_suggested = trial.suggest_loguniform(name, min, max)
 24 |     value = value_suggested if trial.suggest_categorical(f'optional_{name}', [False, True]) else default
 25 |     return value
 26 | #
 27 | 
 28 | def get_parameters(model, trial):
 29 |     if model=='ft_transformer':
 30 |         model_params = {
 31 |             'd_embedding':  trial.suggest_int('d_embedding', 32, 512, step=8), #using n_heads = 8 by default
 32 |             'n_layers': trial.suggest_int('n_layers', 1, 4),
 33 |             'd_ffn_factor': trial.suggest_uniform('d_ffn_factor', 2/3, 8/3),
 34 |             'attention_dropout': trial.suggest_uniform('attention_dropout', 0.0, 0.5),
 35 |             'ffn_dropout' : trial.suggest_uniform('attention_dropout', 0.0, 0.5),
 36 |             'residual_dropout': sample_value_with_default(trial, 'residual_dropout', 'uniform', 0.0, 0.2, 0.0),
 37 |             }
 38 |         training_params = {
 39 |             'lr':  trial.suggest_loguniform('lr', 1e-5, 1e-3),
 40 |             'weight_decay':  trial.suggest_loguniform('weight_decay', 1e-6, 1e-3),
 41 |             }
 42 | 
 43 |     if model=='resnet':
 44 |         model_params = {
 45 |             'd_embedding':  trial.suggest_int('d_embedding', 32, 512, step=8),
 46 |             'd_hidden_factor': trial.suggest_uniform('d_hidden_factor', 1.0, 4.0),
 47 |             'n_layers': trial.suggest_int('n_layers', 1, 8,),
 48 |             'hidden_dropout': trial.suggest_uniform('residual_dropout', 0.0, 0.5),
 49 |             'residual_dropout': sample_value_with_default(trial, 'residual_dropout', 'uniform', 0.0, 0.5, 0.0),
 50 |             }
 51 |         training_params = {
 52 |             'lr':  trial.suggest_loguniform('lr', 1e-5, 1e-3),
 53 |             'weight_decay':  sample_value_with_default(trial, 'weight_decay', 'loguniform', 1e-6, 1e-3, 0.0),
 54 |             }
 55 | 
 56 |     if model=='mlp':
 57 |         n_layers = trial.suggest_int('n_layers', 1, 8)
 58 |         suggest_dim = lambda name: trial.suggest_int(name, 1, 512)
 59 |         d_first = [suggest_dim('d_first')] if n_layers else []
 60 |         d_middle = ([suggest_dim('d_middle')] * (n_layers - 2) if n_layers > 2 else [])
 61 |         d_last = [suggest_dim('d_last')] if n_layers > 1 else []
 62 |         layers = d_first + d_middle + d_last
 63 | 
 64 |         model_params = {
 65 |             'd_embedding':  trial.suggest_int('d_embedding', 32, 512, step=8),
 66 |             'd_layers': layers,
 67 |             'dropout': sample_value_with_default(trial, 'dropout', 'uniform', 0.0, 0.5, 0.0),
 68 |             }
 69 |         training_params = {
 70 |             'lr':  trial.suggest_loguniform('lr', 1e-5, 1e-3),
 71 |             'weight_decay':  sample_value_with_default(trial, 'weight_decay', 'loguniform', 1e-6, 1e-3, 0.0),
 72 |             }
 73 | 
 74 |     return model_params, training_params
 75 | 
 76 | 
 77 | 
 78 | def objective(trial, cfg: DictConfig, trial_configs, trial_stats):
 79 | 
 80 |     model_params, training_params =  get_parameters(cfg.model.name, trial) # need to suggest parameters for optuna here, probably writing a function for suggesting parameters is the optimal way
 81 | 
 82 |     config = copy.deepcopy(cfg) # create config for train_model with suggested parameters
 83 |     for par, value in model_params.items():
 84 |         config.model[par] = value
 85 |     for par, value in training_params.items():
 86 |         config.hyp[par] = value
 87 | 
 88 | 
 89 |     stats = train_net_from_scratch.main(config)
 90 | 
 91 |     trial_configs.append(config)
 92 |     trial_stats.append(stats)
 93 |     print(stats)
 94 | 
 95 |     return stats['val_stats']['score']
 96 | 
 97 | 
 98 | @hydra.main(config_path="config", config_name="optune_config")
 99 | def main(cfg):
100 |     n_optuna_trials = 50
101 | 
102 |     trial_stats = []
103 |     trial_configs = []
104 |     study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.MedianPruner())
105 |     func = lambda trial: objective(trial, cfg, trial_configs, trial_stats)
106 |     study.optimize(func, n_trials=n_optuna_trials)
107 | 
108 |     best_trial = study.best_trial
109 | 
110 |     for key, value in best_trial.params.items():
111 |         print("{}: {}".format(key, value))
112 | 
113 |     best_stats = trial_stats[best_trial.number]
114 | 
115 |     with open(os.path.join("best_stats.json"), "w") as fp:
116 |         json.dump(best_stats, fp, indent = 4)
117 |     with open(os.path.join("best_config.json"), "w") as fp:
118 |         json.dump(best_trial.params, fp, indent = 4)
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | if __name__ == "__main__":
126 |     run_id = dt.utils.generate_run_id()
127 |     sys.argv.append(f"+run_id={run_id}")
128 |     main()
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | hydra-core==1.1.1
 2 | icecream~=2.1.1
 3 | matplotlib~=3.5.0
 4 | numpy~=1.21.4
 5 | omegaconf~=2.1.1
 6 | pandas~=1.3.4
 7 | Pillow==8.2.0
 8 | scipy==1.7.0
 9 | seaborn~=0.11.2
10 | svglib==1.1.0
11 | tensorboard==2.2.2
12 | tensorboard-plugin-wit==1.8.0
13 | torch~=1.10.0
14 | torchvision==0.8.2
15 | tqdm~=4.62.3
16 | 


--------------------------------------------------------------------------------
/train_net_from_scratch.py:
--------------------------------------------------------------------------------
  1 | """ train_net_from_scratch.py
  2 |     Train, test, and save neural networks without transfer learning
  3 |     Developed for Tabular Transfer Learning project
  4 |     March 2022
  5 | """
  6 | 
  7 | import json
  8 | import logging
  9 | import os
 10 | import sys
 11 | from collections import OrderedDict
 12 | 
 13 | import hydra
 14 | import numpy as np
 15 | import torch
 16 | from icecream import ic
 17 | from omegaconf import DictConfig, OmegaConf
 18 | from torch.utils.tensorboard import SummaryWriter
 19 | 
 20 | import deep_tabular as dt
 21 | 
 22 | 
 23 | 
 24 | # Ignore statements for pylint:
 25 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
 26 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
 27 | #     Too many local variables (R0914), Missing docstring (C0116, C0115).
 28 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115
 29 | 
 30 | @hydra.main(config_path="config", config_name="train_net_config")
 31 | def main(cfg: DictConfig):
 32 |     device = "cuda" if torch.cuda.is_available() else "cpu"
 33 |     torch.backends.cudnn.benchmark = True
 34 |     log = logging.getLogger()
 35 |     log.info("\n_________________________________________________\n")
 36 |     log.info("train_net_from_scratch.py main() running.")
 37 |     log.info(OmegaConf.to_yaml(cfg))
 38 |     if cfg.hyp.save_period < 0:
 39 |         cfg.hyp.save_period = 1e8
 40 |     torch.manual_seed(cfg.hyp.seed)
 41 |     torch.cuda.manual_seed_all(cfg.hyp.seed)
 42 |     writer = SummaryWriter(log_dir=f"tensorboard")
 43 | 
 44 |     ####################################################
 45 |     #               Dataset and Network and Optimizer
 46 |     loaders, unique_categories, n_numerical, n_classes = dt.utils.get_dataloaders(cfg)
 47 | 
 48 |     net, start_epoch, optimizer_state_dict = dt.utils.load_model_from_checkpoint(cfg.model,
 49 |                                                                                  n_numerical,
 50 |                                                                                  unique_categories,
 51 |                                                                                  n_classes,
 52 |                                                                                  device)
 53 |     pytorch_total_params = sum(p.numel() for p in net.parameters())
 54 | 
 55 |     log.info(f"This {cfg.model.name} has {pytorch_total_params / 1e6:0.3f} million parameters.")
 56 |     log.info(f"Training will start at epoch {start_epoch}.")
 57 | 
 58 |     optimizer, warmup_scheduler, lr_scheduler = dt.utils.get_optimizer_for_single_net(cfg.hyp,
 59 |                                                                                       net,
 60 |                                                                                       optimizer_state_dict)
 61 |     criterion = dt.utils.get_criterion(cfg.dataset.task)
 62 |     train_setup = dt.TrainingSetup(criterions=criterion,
 63 |                                    optimizer=optimizer,
 64 |                                    scheduler=lr_scheduler,
 65 |                                    warmup=warmup_scheduler)
 66 |     ####################################################
 67 | 
 68 |     ####################################################
 69 |     #        Train
 70 |     log.info(f"==> Starting training for {max(cfg.hyp.epochs - start_epoch, 0)} epochs...")
 71 |     highest_val_acc_so_far = -np.inf
 72 |     done = False
 73 |     epoch = start_epoch
 74 |     best_epoch = epoch
 75 | 
 76 |     while not done and epoch < cfg.hyp.epochs:
 77 |         # forward and backward pass for one whole epoch handeld inside dt.default_training_loop()
 78 |         loss = dt.default_training_loop(net, loaders["train"], train_setup, device)
 79 |         log.info(f"Training loss at epoch {epoch}: {loss}")
 80 | 
 81 |         # if the loss is nan, then stop the training
 82 |         if np.isnan(float(loss)):
 83 |             raise ValueError(f"{ic.format()} Loss is nan, exiting...")
 84 | 
 85 |         # TensorBoard writing
 86 |         writer.add_scalar("Loss/loss", loss, epoch)
 87 |         for i in range(len(optimizer.param_groups)):
 88 |             writer.add_scalar(f"Learning_rate/group{i}",
 89 |                               optimizer.param_groups[i]["lr"],
 90 |                               epoch)
 91 | 
 92 |         # evaluate the model periodically and at the final epoch
 93 |         if (epoch + 1) % cfg.hyp.val_period == 0 or epoch + 1 == cfg.hyp.epochs:
 94 |             test_stats, val_stats, train_stats = dt.evaluate_model(net,
 95 |                                                                    [loaders["test"], loaders["val"], loaders["train"]],
 96 |                                                                    cfg.dataset.task,
 97 |                                                                    device)
 98 |             log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}")
 99 |             log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}")
100 |             log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}")
101 | 
102 |             dt.utils.write_to_tb([train_stats["score"], val_stats["score"], test_stats["score"]],
103 |                                  [f"train_acc-{cfg.dataset.name}",
104 |                                   f"val_acc-{cfg.dataset.name}",
105 |                                   f"test_acc-{cfg.dataset.name}"],
106 |                                  epoch,
107 |                                  writer)
108 | 
109 |         if cfg.hyp.use_patience:
110 |             val_stats, test_stats = dt.evaluate_model(net,
111 |                                                       [loaders["val"], loaders["test"]],
112 |                                                       cfg.dataset.task,
113 |                                                       device)
114 |             if val_stats["score"] > highest_val_acc_so_far:
115 |                 best_epoch = epoch
116 |                 highest_val_acc_so_far = val_stats["score"]
117 |                 log.info(f"New best epoch, val score: {val_stats['score']}")
118 |                 # save current model
119 |                 state = {"net": net.state_dict(), "epoch": epoch, "optimizer": optimizer.state_dict()}
120 |                 out_str = "model_best.pth"
121 |                 log.info(f"Saving model to: {out_str}")
122 |                 torch.save(state, out_str)
123 | 
124 |             if epoch - best_epoch > cfg.hyp.patience:
125 |                 done = True
126 |         epoch += 1
127 |         writer.flush()
128 |         writer.close()
129 | 
130 |     log.info("Running Final Evaluation...")
131 |     checkpoint_path = "model_best.pth"
132 |     net.load_state_dict(torch.load(checkpoint_path)["net"])
133 |     test_stats, val_stats, train_stats = dt.evaluate_model(net,
134 |                                                            [loaders["test"], loaders["val"], loaders["train"]],
135 |                                                            cfg.dataset.task,
136 |                                                            device)
137 | 
138 |     log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}")
139 |     log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}")
140 |     log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}")
141 | 
142 |     stats = OrderedDict([("dataset", cfg.dataset.name),
143 |                          ("model_name", cfg.model.name),
144 |                          ("run_id", cfg.run_id),
145 |                          ("best_epoch", best_epoch),
146 |                          ("routine", "from_scratch"),
147 |                          ("test_stats", test_stats),
148 |                          ("train_stats", train_stats),
149 |                          ("val_stats", val_stats)])
150 |     with open(os.path.join("stats.json"), "w") as fp:
151 |         json.dump(stats, fp, indent=4)
152 |     log.info(json.dumps(stats, indent=4))
153 |     ####################################################
154 |     return stats
155 | 
156 | 
157 | if __name__ == "__main__":
158 |     run_id = dt.utils.generate_run_id()
159 |     sys.argv.append(f"+run_id={run_id}")
160 |     main()
161 | 


--------------------------------------------------------------------------------
/transfer_learn_net.py:
--------------------------------------------------------------------------------
  1 | """ transfer_learn_net.py
  2 |     Pre-train/fine-tune, test, and save neural networks
  3 |     Developed for Tabular Transfer Learning project
  4 |     March 2022
  5 | """
  6 | 
  7 | import json
  8 | import logging
  9 | import os
 10 | import sys
 11 | from collections import OrderedDict
 12 | 
 13 | import hydra
 14 | import numpy as np
 15 | import torch
 16 | from icecream import ic
 17 | from omegaconf import DictConfig, OmegaConf
 18 | from torch.utils.tensorboard import SummaryWriter
 19 | 
 20 | import deep_tabular as dt
 21 | 
 22 | 
 23 | 
 24 | # Ignore statements for pylint:
 25 | #     Too many branches (R0912), Too many statements (R0915), No member (E1101),
 26 | #     Not callable (E1102), Invalid name (C0103), No exception (W0702),
 27 | #     Too many local variables (R0914), Missing docstring (C0116, C0115).
 28 | # pylint: disable=R0912, R0915, E1101, E1102, C0103, W0702, R0914, C0116, C0115
 29 | 
 30 | @hydra.main(config_path="config", config_name="transfer_learn_net_config")
 31 | def main(cfg: DictConfig):
 32 |     device = "cuda" if torch.cuda.is_available() else "cpu"
 33 |     torch.backends.cudnn.benchmark = True
 34 |     log = logging.getLogger()
 35 |     log.info("\n_________________________________________________\n")
 36 |     log.info("train_net_from_scratch.py main() running.")
 37 |     log.info(OmegaConf.to_yaml(cfg))
 38 |     if cfg.hyp.save_period < 0:
 39 |         cfg.hyp.save_period = 1e8
 40 |     torch.manual_seed(cfg.hyp.seed)
 41 |     torch.cuda.manual_seed_all(cfg.hyp.seed)
 42 |     writer = SummaryWriter(log_dir=f"tensorboard")
 43 | 
 44 |     ####################################################
 45 |     #               Dataset and Network and Optimizer
 46 |     loaders, unique_categories, n_numerical, n_classes = dt.utils.get_dataloaders(cfg)
 47 | 
 48 |     net, start_epoch, optimizer_state_dict = dt.utils.load_transfer_model_from_checkpoint(cfg.model,
 49 |                                                                                  n_numerical,
 50 |                                                                                  unique_categories,
 51 |                                                                                  n_classes,
 52 |                                                                                  device)
 53 |     pytorch_total_params = sum(p.numel() for p in net.parameters())
 54 | 
 55 |     log.info(f"This {cfg.model.name} has {pytorch_total_params / 1e6:0.3f} million parameters.")
 56 |     log.info(f"Training will start at epoch {start_epoch}.")
 57 | 
 58 |     optimizer, warmup_scheduler, lr_scheduler = dt.utils.get_optimizer_for_single_net(cfg.hyp,
 59 |                                                                                       net,
 60 |                                                                                       optimizer_state_dict)
 61 |     criterion = dt.utils.get_criterion(cfg.dataset.task)
 62 |     train_setup = dt.TrainingSetup(criterions=criterion,
 63 |                                    optimizer=optimizer,
 64 |                                    scheduler=lr_scheduler,
 65 |                                    warmup=warmup_scheduler)
 66 |     ####################################################
 67 | 
 68 |     ####################################################
 69 |     #        Train
 70 |     log.info(f"==> Starting training for {max(cfg.hyp.epochs - start_epoch, 0)} epochs...")
 71 |     highest_val_acc_so_far = -np.inf
 72 |     done = False
 73 |     epoch = start_epoch
 74 |     best_epoch = epoch
 75 | 
 76 |     while not done and epoch < cfg.hyp.epochs:
 77 |         # forward and backward pass for one whole epoch handeld inside dt.default_training_loop()
 78 |         loss = dt.default_training_loop(net, loaders["train"], train_setup, device)
 79 |         log.info(f"Training loss at epoch {epoch}: {loss}")
 80 |         # if the loss is nan, then stop the training
 81 |         if np.isnan(float(loss)):
 82 |             raise ValueError(f"{ic.format()} Loss is nan, exiting...")
 83 | 
 84 |         # TensorBoard writing
 85 |         writer.add_scalar("Loss/loss", loss, epoch)
 86 |         for i in range(len(optimizer.param_groups)):
 87 |             writer.add_scalar(f"Learning_rate/group{i}",
 88 |                               optimizer.param_groups[i]["lr"],
 89 |                               epoch)
 90 | 
 91 |         # evaluate the model periodically and at the final epoch
 92 |         if (epoch + 1) % cfg.hyp.val_period == 0 or epoch + 1 == cfg.hyp.epochs:
 93 |             test_stats, val_stats, train_stats = dt.evaluate_model(net,
 94 |                                                                    [loaders["test"], loaders["val"], loaders["train"]],
 95 |                                                                    cfg.dataset.task,
 96 |                                                                    device)
 97 |             log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}")
 98 |             log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}")
 99 |             log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}")
100 | 
101 |             dt.utils.write_to_tb([train_stats["score"], val_stats["score"], test_stats["score"]],
102 |                                  [f"train_acc-{cfg.dataset.name}",
103 |                                   f"val_acc-{cfg.dataset.name}",
104 |                                   f"test_acc-{cfg.dataset.name}"],
105 |                                  epoch,
106 |                                  writer)
107 | 
108 |         if cfg.hyp.use_patience:
109 |             val_stats, test_stats = dt.evaluate_model(net,
110 |                                                       [loaders["val"], loaders["test"]],
111 |                                                       cfg.dataset.task,
112 |                                                       device)
113 |             if val_stats["score"] > highest_val_acc_so_far:
114 |                 best_epoch = epoch
115 |                 highest_val_acc_so_far = val_stats["score"]
116 |                 log.info(f"New best epoch, val score: {val_stats['score']}")
117 |                 # save current model
118 |                 state = {"net": net.state_dict(), "epoch": epoch, "optimizer": optimizer.state_dict()}
119 |                 out_str = "model_best.pth"
120 |                 log.info(f"Saving model to: {out_str}")
121 |                 torch.save(state, out_str)
122 | 
123 |             if epoch - best_epoch > cfg.hyp.patience:
124 |                 done = True
125 |         epoch += 1
126 |         writer.flush()
127 |         writer.close()
128 | 
129 |     log.info("Running Final Evaluation...")
130 |     checkpoint_path = "model_best.pth"
131 |     net.load_state_dict(torch.load(checkpoint_path)["net"])
132 |     test_stats, val_stats, train_stats = dt.evaluate_model(net,
133 |                                                            [loaders["test"], loaders["val"], loaders["train"]],
134 |                                                            cfg.dataset.task,
135 |                                                            device)
136 | 
137 |     log.info(f"Training accuracy: {json.dumps(train_stats, indent=4)}")
138 |     log.info(f"Val accuracy: {json.dumps(val_stats, indent=4)}")
139 |     log.info(f"Test accuracy: {json.dumps(test_stats, indent=4)}")
140 | 
141 |     stats = OrderedDict([("dataset", cfg.dataset.name),
142 |                          ("model_name", cfg.model.name),
143 |                          ("run_id", cfg.run_id),
144 |                          ("best_epoch", best_epoch),
145 |                          ("routine", "from_scratch"),
146 |                          ("test_stats", test_stats),
147 |                          ("train_stats", train_stats),
148 |                          ("val_stats", val_stats)])
149 |     with open(os.path.join("stats.json"), "w") as fp:
150 |         json.dump(stats, fp, indent=4)
151 |     log.info(json.dumps(stats, indent=4))
152 |     ####################################################
153 |     return stats
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     run_id = dt.utils.generate_run_id()
158 |     sys.argv.append(f"+run_id={run_id}")
159 |     main()
160 | 


--------------------------------------------------------------------------------