├── .gitignore ├── LICENSE ├── README.md ├── regmean_demo.ipynb ├── requirements.txt ├── scripts ├── deberta │ ├── incremental_emotion.py │ ├── incremental_ner.py │ └── pairwise_emotion.py ├── distilbert │ ├── pairwise_glue_difftask.py │ └── pairwise_glue_subset.py ├── roberta │ ├── incremental_emotion.py │ ├── incremental_ner.py │ ├── pairwise_emotion.py │ ├── pairwise_glue_difftask.py │ └── pairwise_glue_subset.py └── t5 │ ├── incremental_emotion.py │ └── pairwise_emotion.py └── src ├── __init__.py ├── configs ├── datasets │ ├── emotion.yaml │ ├── emotion_diffseed.yaml │ ├── emotion_gen.yaml │ ├── emotion_gen_mtl.yaml │ ├── emotion_mtl.yaml │ ├── glue.yaml │ ├── glue_mtl.yaml │ ├── ner.yaml │ ├── ner_diffseed.yaml │ ├── ner_mtl.yaml │ └── subsets │ │ ├── glue_mtl_subset_100.yaml │ │ ├── glue_mtl_subset_1k.yaml │ │ ├── glue_partition_10k_iid.yaml │ │ ├── glue_partition_1k_iid.yaml │ │ ├── glue_partition_1k_niid.yaml │ │ └── glue_partition_1k_niid_diffseed.yaml ├── defaults.yaml └── exps │ ├── deberta │ ├── deberta-large-emotion-fisher.yaml │ ├── deberta-large-emotion-locals.yaml │ ├── deberta-large-emotion-mtl.yaml │ ├── deberta-large-emotion-regmean.yaml │ ├── deberta-large-emotion.yaml │ ├── ner │ │ ├── deberta-ner-locals.yaml │ │ ├── deberta-ner-mtl.yaml │ │ └── ood │ │ │ ├── deberta-ner-ensemble.yaml │ │ │ ├── deberta-ner-fisher.yaml │ │ │ ├── deberta-ner-mtl-ood.yaml │ │ │ ├── deberta-ner-regmean.yaml │ │ │ └── deberta-ner.yaml │ └── ood │ │ ├── deberta-large-emotion-ensemble-ood.yaml │ │ ├── deberta-large-emotion-fisher-norm-ood.yaml │ │ ├── deberta-large-emotion-fisher-ood.yaml │ │ ├── deberta-large-emotion-mtl-ood.yaml │ │ ├── deberta-large-emotion-ood.yaml │ │ └── deberta-large-emotion-regmean-ood.yaml │ ├── distilbert │ ├── coeff │ │ ├── distilbert-coeff-50-fisher-fixregression-normalize.yaml │ │ ├── distilbert-coeff-50-fisher-fixregression.yaml │ │ ├── distilbert-coeff-50-fisher-normalize.yaml │ │ ├── distilbert-coeff-50-fisher.yaml │ │ ├── distilbert-coeff-50.yaml │ │ └── distilbert-coeff.yaml │ ├── distilbert-base.yaml │ ├── distilbert-fisher.yaml │ ├── distilbert-mtl.yaml │ ├── distilbert-ot.yaml │ ├── distilbert-regmean.yaml │ ├── fisher │ │ ├── distilbert-fisher_10.yaml │ │ ├── distilbert-fisher_100.yaml │ │ ├── distilbert-fisher_1000-normalize.yaml │ │ └── distilbert-fisher_abl.yaml │ ├── regmean │ │ └── distilbert-regmean-coeff.yaml │ ├── subset │ │ ├── distilbert-iid-10k-regmean-whead.yaml │ │ ├── distilbert-iid-1k-debug.yaml │ │ ├── distilbert-iid-1k-regmean.yaml │ │ ├── distilbert-iid-1k-whead.yaml │ │ ├── distilbert-iid-1k.yaml │ │ ├── distilbert-niid-1k-fisher-whead.yaml │ │ ├── distilbert-niid-1k-regmean-whead.yaml │ │ └── distilbert-niid-1k-whead.yaml │ └── tsp │ │ ├── distilbert-tsp1-5.yaml │ │ ├── distilbert-tsp2-5.yaml │ │ ├── distilbert-tsp3-5.yaml │ │ └── distilbert-tsp4-5.yaml │ ├── roberta-base │ ├── glue │ │ ├── roberta-base-fisher-norm.yaml │ │ ├── roberta-base-fisher.yaml │ │ ├── roberta-base-mtl-debug.yaml │ │ ├── roberta-base-mtl.yaml │ │ ├── roberta-base-regmean.yaml │ │ ├── roberta-base.yaml │ │ ├── roberta-coeff-50-fisher.yaml │ │ └── roberta-coeff-50.yaml │ ├── ner │ │ ├── distilbert-base-ner-mtl.yaml │ │ ├── distilbert-base-ner.yaml │ │ ├── ood │ │ │ ├── roberta-base-ner-ensemble.yaml │ │ │ ├── roberta-base-ner-fisher.yaml │ │ │ ├── roberta-base-ner-mtl-ood.yaml │ │ │ ├── roberta-base-ner-regmean.yaml │ │ │ └── roberta-base-ner.yaml │ │ ├── roberta-base-ner-fisher.yaml │ │ ├── roberta-base-ner-mtl.yaml │ │ ├── roberta-base-ner-regmean.yaml │ │ └── roberta-base-ner.yaml │ ├── ood │ │ ├── roberta-base-emotion-ensemble-ood.yaml │ │ ├── roberta-base-emotion-fisher-norm-ood.yaml │ │ ├── roberta-base-emotion-fisher-ood.yaml │ │ ├── roberta-base-emotion-local-ood.yaml │ │ ├── roberta-base-emotion-mtl-ood.yaml │ │ ├── roberta-base-emotion-ood.yaml │ │ ├── roberta-base-emotion-partial-regmean-ood.yaml │ │ ├── roberta-base-emotion-regmean-diag-ood.yaml │ │ ├── roberta-base-emotion-regmean-ood.yaml │ │ └── roberta-base-emotion-regmean-rw-ood.yaml │ ├── roberta-base-emotion-ensemble.yaml │ ├── roberta-base-emotion-fisher.yaml │ ├── roberta-base-emotion-mtl.yaml │ ├── roberta-base-emotion-ot.yaml │ ├── roberta-base-emotion-regmean.yaml │ ├── roberta-base-emotion.yaml │ └── subset │ │ ├── rb-1k-fisher-whead.yaml │ │ ├── rb-1k-regmean-whead.yaml │ │ └── rb-1k-whead.yaml │ └── t5 │ ├── ood │ ├── t5-base-emotion-ensemble-ood.yaml │ ├── t5-base-emotion-fisher-norm-ood.yaml │ ├── t5-base-emotion-fisher-ood.yaml │ ├── t5-base-emotion-mtl-ood.yaml │ ├── t5-base-emotion-ood.yaml │ ├── t5-base-emotion-regmean-full-ood.yaml │ ├── t5-base-emotion-regmean-ood.yaml │ ├── t5-base-emotion-regmean-ood2.yaml │ └── t5-base-emotion-regmean-rw-ood.yaml │ ├── t5-base-emotion-debug.yaml │ ├── t5-base-emotion-fisher.yaml │ ├── t5-base-emotion-mtl.yaml │ ├── t5-base-emotion-regmean-diag.yaml │ ├── t5-base-emotion-regmean.yaml │ ├── t5-base-emotion.yaml │ └── t5-base-local.yaml ├── data_manager ├── __init__.py ├── data_utils.py ├── emotion_data_manager.py ├── emotion_gen_data_manager.py ├── glue_data_manager.py ├── metrics │ ├── __init__.py │ ├── glue.py │ └── ner.py ├── ner_data_manager.py ├── ner_data_utils.py └── simple_data_manager.py ├── model_merge ├── __init__.py ├── avg_merger.py ├── base.py ├── ensembler.py ├── local_trainer.py ├── misc.py ├── net.py ├── ot_merger.py └── ot_utils │ └── ot_ground_metric.py ├── remote_io ├── __init__.py ├── get_resources.py └── zoo.py ├── run_experiments.py └── utils ├── __init__.py ├── config.py └── initializer.py /.gitignore: -------------------------------------------------------------------------------- 1 | # added by me 2 | tmp_* 3 | runs/ 4 | wandb/ 5 | wget-log 6 | resources/ 7 | remote_zoo_cache/ 8 | helper/ 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | /build/ 21 | /build_output/ 22 | /develop-eggs/ 23 | /dist/ 24 | /downloads/ 25 | /eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | /sdist/ 31 | /var/ 32 | /wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | /docs/_build/ 79 | /docs/build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # Environments 97 | .env 98 | .venv 99 | env/ 100 | venv/ 101 | ENV/ 102 | env.bak/ 103 | venv.bak/ 104 | /*.venv/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | 119 | # IDE 120 | .vscode 121 | .devcontainer 122 | .swp 123 | .idea/ 124 | 125 | # Editor artifacts 126 | .*.sw? 127 | *~ 128 | \#*\# 129 | .\#* 130 | 131 | # rst files 132 | /docs/source/modules.rst 133 | /docs/source/model_merge*.rst 134 | 135 | # Project docker switch file 136 | .lcldev 137 | 138 | # shrinkwrap files 139 | shrinkwrap-build-log.txt 140 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | async-timeout==4.0.2 4 | attrs==22.2.0 5 | certifi==2022.12.7 6 | charset-normalizer==3.1.0 7 | datasets==2.3.2 8 | dill==0.3.5.1 9 | filelock==3.9.0 10 | frozenlist==1.3.3 11 | fsspec==2023.3.0 12 | huggingface-hub==0.13.0 13 | idna==3.4 14 | joblib==1.2.0 15 | multidict==6.0.4 16 | multiprocess==0.70.13 17 | numpy==1.24.2 18 | nvidia-cublas-cu11==11.10.3.66 19 | nvidia-cuda-nvrtc-cu11==11.7.99 20 | nvidia-cuda-runtime-cu11==11.7.99 21 | nvidia-cudnn-cu11==8.5.0.96 22 | packaging==23.0 23 | pandas==1.5.3 24 | POT==0.8.2 25 | pyarrow==11.0.0 26 | python-dateutil==2.8.2 27 | pytz==2022.7.1 28 | PyYAML==6.0 29 | regex==2022.10.31 30 | requests==2.28.2 31 | responses==0.18.0 32 | scikit-learn==1.2.1 33 | scipy==1.10.1 34 | six==1.16.0 35 | threadpoolctl==3.1.0 36 | tokenizers==0.12.1 37 | torch==1.13.1 38 | tqdm==4.65.0 39 | transformers==4.20.1 40 | typing_extensions==4.5.0 41 | urllib3==1.26.14 42 | xxhash==3.2.0 43 | yarl==1.8.2 44 | -------------------------------------------------------------------------------- /scripts/deberta/incremental_emotion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | # same classification head init, model merging 18 | 19 | orders = ["model2", "model5", "model1", "model3", "model0"] 20 | 21 | for seed in [1, 2, 3, 4, 5]: 22 | for idx in range(2, len(orders) + 1): 23 | # simple 24 | to_merge = " ".join(orders[:idx]) 25 | os.system( 26 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/ood/deberta-large-emotion-ood.yaml --filter_model {to_merge} --templates seed={seed}" 27 | ) 28 | # fisher 29 | os.system( 30 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/ood/deberta-large-emotion-fisher-ood.yaml --filter_model {to_merge} --templates seed={seed}" 31 | ) 32 | # regmean 33 | os.system( 34 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/ood/deberta-large-emotion-regmean-ood.yaml --filter_model {to_merge} --templates seed={seed}" 35 | ) 36 | 37 | # different classification head init, model merging 38 | 39 | for seed in [1, 2, 3, 4, 5]: 40 | for idx in range(2, len(orders) + 1): 41 | to_merge = " ".join(orders[:idx]) 42 | # simple 43 | os.system( 44 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/deberta/ood/deberta-large-emotion-ood.yaml --filter_model {to_merge} --templates dseed_generator={seed} seed={seed}" 45 | ) 46 | # fisher 47 | os.system( 48 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/deberta/ood/deberta-large-emotion-fisher-ood.yaml --filter_model {to_merge} --templates dseed_generator={seed} seed={seed}" 49 | ) 50 | # regmean 51 | os.system( 52 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/deberta/ood/deberta-large-emotion-regmean-ood.yaml --filter_model {to_merge} --templates dseed_generator={seed} seed={seed}" 53 | ) 54 | 55 | for seed in [1, 2, 3, 4, 5]: 56 | # multi task learning comparator 57 | to_merge = " ".join(orders) 58 | os.system( 59 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_mtl.yaml src/configs/exps/deberta/ood/deberta-large-emotion-mtl-ood.yaml --filter_model {to_merge} --templates seed={seed}" 60 | ) 61 | 62 | # ensembling 63 | to_merge = " ".join(orders) 64 | os.system( 65 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/ood/deberta-large-emotion-ensemble-ood.yaml --filter_model {to_merge} --templates seed={seed}" 66 | ) 67 | 68 | # individual models, without merging 69 | for to_merge in orders: 70 | os.system( 71 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/ood/deberta-large-emotion-ood.yaml --filter_model {to_merge} --templates seed={seed}" 72 | ) 73 | -------------------------------------------------------------------------------- /scripts/deberta/pairwise_emotion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | # same classification head init 18 | 19 | for seed in [1, 2, 3, 4, 5]: 20 | for idx1 in range(0, 6): 21 | for idx2 in range(idx1 + 1, 6): 22 | if idx1 != 4 and idx2 != 4: # this dataset uses a different label space 23 | # simple 24 | os.system( 25 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/deberta-large-emotion.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 26 | ) 27 | # fisher 28 | os.system( 29 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/deberta-large-emotion-fisher.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 30 | ) 31 | # regmean 32 | os.system( 33 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/deberta/deberta-large-emotion-regmean.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 34 | ) 35 | 36 | # different classification head init 37 | 38 | for seed in [1, 2, 3, 4, 5]: 39 | for idx1 in range(0, 6): 40 | for idx2 in range(idx1 + 1, 6): 41 | if idx1 != 4 and idx2 != 4: # this dataset uses a different label space 42 | # simple 43 | os.system( 44 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/deberta/deberta-large-emotion.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 45 | ) 46 | # fisher 47 | os.system( 48 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/deberta/deberta-large-emotion-fisher.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 49 | ) 50 | # regmean 51 | os.system( 52 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/deberta/deberta-large-emotion-regmean.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 53 | ) 54 | -------------------------------------------------------------------------------- /scripts/distilbert/pairwise_glue_difftask.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | for seed in [1, 2, 3, 4, 5]: 18 | for idx1 in range(0, 9): 19 | for idx2 in range(idx1 + 1, 9): 20 | os.system( 21 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/glue.yaml src/configs/exps/distilbert/distilbert-base.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 22 | ) 23 | # fisher 24 | os.system( 25 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/glue.yaml src/configs/exps/distilbert/distilbert-fisher.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 26 | ) 27 | # regmean 28 | os.system( 29 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/glue.yaml src/configs/exps/distilbert/distilbert-regmean.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 30 | ) 31 | -------------------------------------------------------------------------------- /scripts/distilbert/pairwise_glue_subset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | for seed in [1, 2, 3, 4, 5]: 18 | for task in ["cola", "sst2", "mrpc", "stsb", "mnli", "qnli", "qqp", "rte"]: 19 | os.system( 20 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/subests/glue_partition_1k_niid.yaml src/configs/exps/distilbert/subset/distilbert-niid-1k-whead.yaml --templates seed={seed} dataset_name={task}" 21 | ) 22 | # fisher 23 | os.system( 24 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/subests/glue_partition_1k_niid.yaml src/configs/exps/distilbert/subset/distilbert-niid-1k-fisher-whead.yaml --templates seed={seed} dataset_name={task}" 25 | ) 26 | # regmean 27 | os.system( 28 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/subests/glue_partition_1k_niid.yaml src/configs/exps/distilbert/subset/distilbert-niid-1k-regmean-whead.yaml --templates seed={seed} dataset_name={task}" 29 | ) 30 | -------------------------------------------------------------------------------- /scripts/roberta/pairwise_emotion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | # same classification head init 18 | 19 | for seed in [1, 2, 3, 4, 5]: 20 | for idx1 in range(0, 6): 21 | for idx2 in range(idx1 + 1, 6): 22 | if idx1 != 4 and idx2 != 4: # this dataset uses a different label space 23 | # simple 24 | os.system( 25 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/roberta-base/roberta-base-emotion.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 26 | ) 27 | # fisher 28 | os.system( 29 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/roberta-base/roberta-base-emotion-fisher.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 30 | ) 31 | # regmean 32 | os.system( 33 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/roberta-base/roberta-base-emotion-regmean.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 34 | ) 35 | 36 | # different classification head init 37 | 38 | for seed in [1, 2, 3, 4, 5]: 39 | for idx1 in range(0, 6): 40 | for idx2 in range(idx1 + 1, 6): 41 | if idx1 != 4 and idx2 != 4: # this dataset uses a different label space 42 | # simple 43 | os.system( 44 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/roberta-base/roberta-base-emotion.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 45 | ) 46 | # fisher 47 | os.system( 48 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/roberta-base/roberta-base-emotion-fisher.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 49 | ) 50 | # regmean 51 | os.system( 52 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/roberta-base/roberta-base-emotion-regmean.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 53 | ) 54 | -------------------------------------------------------------------------------- /scripts/roberta/pairwise_glue_difftask.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | for seed in [1, 2, 3, 4, 5]: 18 | for idx1 in range(0, 9): 19 | for idx2 in range(idx1 + 1, 9): 20 | os.system( 21 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/glue.yaml src/configs/exps/roberta-base/glue/roberta-base.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 22 | ) 23 | # fisher 24 | os.system( 25 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/glue.yaml src/configs/exps/roberta-base/glue/roberta-base-fisher.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 26 | ) 27 | # regmean 28 | os.system( 29 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/glue.yaml src/configs/exps/roberta-base/glue/roberta-base-regmean.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 30 | ) 31 | -------------------------------------------------------------------------------- /scripts/roberta/pairwise_glue_subset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | for seed in [1, 2, 3, 4, 5]: 18 | for task in ["cola", "sst2", "mrpc", "stsb", "mnli", "qnli", "qqp", "rte"]: 19 | os.system( 20 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/subests/glue_partition_1k_niid.yaml src/configs/exps/roberta-base/subset/rb-1k-whead.yaml --templates seed={seed} dataset_name={task}" 21 | ) 22 | # fisher 23 | os.system( 24 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/subests/glue_partition_1k_niid.yaml src/configs/exps/roberta-base/subset/rb-1k-fisher-whead.yaml --templates seed={seed} dataset_name={task}" 25 | ) 26 | # regmean 27 | os.system( 28 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/subests/glue_partition_1k_niid.yaml src/configs/exps/roberta-base/subset/rb-1k-regmean-whead.yaml --templates seed={seed} dataset_name={task}" 29 | ) 30 | -------------------------------------------------------------------------------- /scripts/t5/incremental_emotion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | orders = ["model2", "model5", "model1", "model3", "model0"] 18 | 19 | for seed in [1, 2, 3, 4, 5]: 20 | for idx in range(2, len(orders) + 1): 21 | # simple 22 | to_merge = " ".join(orders[:idx]) 23 | os.system( 24 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_gen.yaml src/configs/exps/t5/ood/t5-base-emotion-ood.yaml --filter_model {to_merge} --templates seed={seed}" 25 | ) 26 | # fisher 27 | os.system( 28 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_gen.yaml src/configs/exps/t5/ood/t5-base-emotion-fisher-ood.yaml --filter_model {to_merge} --templates seed={seed}" 29 | ) 30 | # regmean 31 | os.system( 32 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_gen.yaml src/configs/exps/t5/ood/t5-base-emotion-regmean-ood.yaml --filter_model {to_merge} --templates seed={seed}" 33 | ) 34 | 35 | # multi task learning comparator 36 | to_merge = " ".join(orders) 37 | os.system( 38 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_gen_mtl.yaml src/configs/exps/t5/ood/t5-base-emotion-mtl-ood.yaml --filter_model {to_merge} --templates seed={seed}" 39 | ) 40 | 41 | # ensembling 42 | to_merge = " ".join(orders) 43 | os.system( 44 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_gen.yaml src/configs/exps/t5/ood/t5-base-emotion-ensemble-ood.yaml --filter_model {to_merge} --templates seed={seed}" 45 | ) 46 | 47 | # individual models, without merging 48 | for to_merge in orders: 49 | os.system( 50 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_gen.yaml src/configs/exps/t5/ood/t5-base-emotion-ood.yaml --filter_model {to_merge} --templates seed={seed}" 51 | ) 52 | -------------------------------------------------------------------------------- /scripts/t5/pairwise_emotion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | # same classification head init 18 | 19 | for seed in [1, 2, 3, 4, 5]: 20 | for idx1 in range(0, 6): 21 | for idx2 in range(idx1 + 1, 6): 22 | if idx1 != 4 and idx2 != 4: # this dataset uses a different label space 23 | # simple 24 | os.system( 25 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/roberta-base/roberta-base-emotion.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 26 | ) 27 | # fisher 28 | os.system( 29 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/roberta-base/roberta-base-emotion-fisher.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 30 | ) 31 | # regmean 32 | os.system( 33 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion.yaml src/configs/exps/roberta-base/roberta-base-emotion-regmean.yaml --filter_model model{idx1} model{idx2} --templates seed={seed}" 34 | ) 35 | 36 | # different classification head init 37 | 38 | for seed in [1, 2, 3, 4, 5]: 39 | for idx1 in range(0, 6): 40 | for idx2 in range(idx1 + 1, 6): 41 | if idx1 != 4 and idx2 != 4: # this dataset uses a different label space 42 | # simple 43 | os.system( 44 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/roberta-base/roberta-base-emotion.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 45 | ) 46 | # fisher 47 | os.system( 48 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/roberta-base/roberta-base-emotion-fisher.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 49 | ) 50 | # regmean 51 | os.system( 52 | f"python -m src.run_experiments --config_file src/configs/defaults.yaml src/configs/datasets/emotion_diffseed.yaml src/configs/exps/roberta-base/roberta-base-emotion-regmean.yaml --filter_model model{idx1} model{idx2} --templates dseed_generator={seed} seed={seed}" 53 | ) 54 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /src/configs/datasets/emotion.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/emotion_debug' 2 | 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | templates: 13 | dataset: "emotion" 14 | 15 | 16 | ood_datasets: 17 | ood0: 18 | dataset_name: emoint 19 | ood1: 20 | dataset_name: ssec 21 | ood2: 22 | dataset_name: electoraltweets 23 | ood3: 24 | dataset_name: grounded_emotions 25 | ood4: 26 | dataset_name: affectivetext 27 | ood5: 28 | dataset_name: "dailydialog" 29 | test_only: true 30 | ood6: 31 | dataset_name: "crowdflower" 32 | test_only: true 33 | ood7: 34 | dataset_name: "tec" 35 | test_only: true 36 | ood8: 37 | dataset_name: "tales-emotion" 38 | test_only: true 39 | ood9: 40 | dataset_name: "isear" 41 | test_only: true 42 | 43 | ood_all_is_test: true 44 | 45 | local_models: 46 | output_dir_format: '{main_output_dir}/local_models/{name}' 47 | models: 48 | model0: 49 | dataset_name: "dailydialog" 50 | model1: 51 | dataset_name: "crowdflower" 52 | model2: 53 | dataset_name: "tec" 54 | model3: 55 | dataset_name: "tales-emotion" 56 | model4: 57 | dataset_name: "emobank" 58 | model5: 59 | dataset_name: "isear" 60 | model6: 61 | dataset_name: "emoint" 62 | model7: 63 | dataset_name: "ssec" 64 | model8: 65 | dataset_name: "electoraltweets" 66 | model9: 67 | dataset_name: "fb-valence-arousal-anon" 68 | model10: 69 | dataset_name: "grounded_emotions" 70 | model11: 71 | dataset_name: "emotion-cause" 72 | model12: 73 | dataset_name: "affectivetext" 74 | 75 | merger: 76 | exclude_param_regex: [] 77 | 78 | tokenizer: "{resource_dir}/distilbert-base-uncased" 79 | model_type: distilbert 80 | 81 | # for debug 82 | 83 | 84 | global_device: 'cuda:0' 85 | dataset: "{dataset}" 86 | partition_method: "uniform" 87 | 88 | 89 | # from fednlp: model_args 90 | default_model_args: 91 | # just for debugging 92 | is_regression: false 93 | task_type: "multi_label" 94 | num_train_epochs: 3.0 95 | do_lower_case: true 96 | per_device_eval_batch_size: 32 97 | fp16: false 98 | gradient_accumulation_steps: 1 99 | learning_rate: 2.0e-5 100 | local_rank: -1 101 | max_grad_norm: 1.0 102 | max_seq_length: 128 103 | model_type: null 104 | save_total_limit: 2 105 | max_steps: -1 106 | per_device_train_batch_size: 32 107 | use_multiprocessing: false # dataloader 108 | labels_map: {} 109 | regression: false 110 | version: 0 111 | partition: -1 112 | device: 'cuda:0' 113 | 114 | -------------------------------------------------------------------------------- /src/configs/datasets/emotion_diffseed.yaml: -------------------------------------------------------------------------------- 1 | output_dir_keys: ["diffseed"] 2 | diffseed: "{dseed_generator}" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | templates: 13 | dataset: "emotion" 14 | dseed_generator: 0 15 | 16 | dseed_n: 5 17 | 18 | ood_datasets: 19 | ood0: 20 | dataset_name: emoint 21 | ood1: 22 | dataset_name: ssec 23 | ood2: 24 | dataset_name: electoraltweets 25 | ood3: 26 | dataset_name: grounded_emotions 27 | ood4: 28 | dataset_name: affectivetext 29 | ood5: 30 | dataset_name: "dailydialog" 31 | test_only: true 32 | ood6: 33 | dataset_name: "crowdflower" 34 | test_only: true 35 | ood7: 36 | dataset_name: "tec" 37 | test_only: true 38 | ood8: 39 | dataset_name: "tales-emotion" 40 | test_only: true 41 | ood9: 42 | dataset_name: "isear" 43 | test_only: true 44 | 45 | 46 | local_models: 47 | output_dir_format: '{main_output_dir}/local_models/{name}' 48 | models: 49 | model0: 50 | dataset_name: "dailydialog" 51 | seed: "{dseed1}" 52 | zoo_filter: 53 | seed: "{dseed1}" 54 | model1: 55 | dataset_name: "crowdflower" 56 | seed: "{dseed2}" 57 | zoo_filter: 58 | seed: "{dseed2}" 59 | model2: 60 | dataset_name: "tec" 61 | seed: "{dseed3}" 62 | zoo_filter: 63 | seed: "{dseed3}" 64 | model3: 65 | dataset_name: "tales-emotion" 66 | seed: "{dseed4}" 67 | zoo_filter: 68 | seed: "{dseed4}" 69 | model4: 70 | dataset_name: "emobank" 71 | model5: 72 | dataset_name: "isear" 73 | seed: "{dseed5}" 74 | zoo_filter: 75 | seed: "{dseed5}" 76 | model6: 77 | dataset_name: "emoint" 78 | model7: 79 | dataset_name: "ssec" 80 | model8: 81 | dataset_name: "electoraltweets" 82 | model9: 83 | dataset_name: "fb-valence-arousal-anon" 84 | model10: 85 | dataset_name: "grounded_emotions" 86 | model11: 87 | dataset_name: "emotion-cause" 88 | model12: 89 | dataset_name: "affectivetext" 90 | 91 | merger: 92 | exclude_param_regex: [] 93 | 94 | tokenizer: "{resource_dir}/distilbert-base-uncased" 95 | model_type: distilbert 96 | 97 | # for debug 98 | 99 | 100 | global_device: 'cuda:0' 101 | dataset: "{dataset}" 102 | partition_method: "uniform" 103 | 104 | 105 | # from fednlp: model_args 106 | default_model_args: 107 | # just for debugging 108 | is_regression: false 109 | task_type: "multi_label" 110 | num_train_epochs: 3.0 111 | do_lower_case: true 112 | per_device_eval_batch_size: 32 113 | fp16: false 114 | gradient_accumulation_steps: 1 115 | learning_rate: 2.0e-5 116 | local_rank: -1 117 | max_grad_norm: 1.0 118 | max_seq_length: 128 119 | model_type: null 120 | save_total_limit: 2 121 | max_steps: -1 122 | per_device_train_batch_size: 32 123 | use_multiprocessing: false # dataloader 124 | labels_map: {} 125 | regression: false 126 | version: 0 127 | partition: -1 128 | device: 'cuda:0' 129 | 130 | -------------------------------------------------------------------------------- /src/configs/datasets/emotion_gen.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/emotion_debug' 2 | 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | templates: 13 | dataset: "emotion" 14 | 15 | seq2seq: true 16 | 17 | ood_datasets: 18 | ood0: 19 | dataset_name: emoint 20 | ood1: 21 | dataset_name: ssec 22 | ood2: 23 | dataset_name: electoraltweets 24 | ood3: 25 | dataset_name: grounded_emotions 26 | ood4: 27 | dataset_name: affectivetext 28 | ood5: 29 | dataset_name: "dailydialog" 30 | test_only: true 31 | ood6: 32 | dataset_name: "crowdflower" 33 | test_only: true 34 | ood7: 35 | dataset_name: "tec" 36 | test_only: true 37 | ood8: 38 | dataset_name: "tales-emotion" 39 | test_only: true 40 | ood9: 41 | dataset_name: "isear" 42 | test_only: true 43 | 44 | ood_all_is_test: true 45 | 46 | local_models: 47 | output_dir_format: '{main_output_dir}/local_models/{name}' 48 | models: 49 | model0: 50 | dataset_name: "dailydialog" 51 | model1: 52 | dataset_name: "crowdflower" 53 | model2: 54 | dataset_name: "tec" 55 | model3: 56 | dataset_name: "tales-emotion" 57 | model4: 58 | dataset_name: "emobank" 59 | model5: 60 | dataset_name: "isear" 61 | model6: 62 | dataset_name: "emoint" 63 | model7: 64 | dataset_name: "ssec" 65 | model8: 66 | dataset_name: "electoraltweets" 67 | model9: 68 | dataset_name: "fb-valence-arousal-anon" 69 | model10: 70 | dataset_name: "grounded_emotions" 71 | model11: 72 | dataset_name: "emotion-cause" 73 | model12: 74 | dataset_name: "affectivetext" 75 | 76 | merger: 77 | exclude_param_regex: [] 78 | multi_label_head_special: false 79 | 80 | tokenizer: "{resource_dir}/distilbert-base-uncased" 81 | model_type: distilbert 82 | 83 | # for debug 84 | 85 | 86 | global_device: 'cuda:0' 87 | dataset: "{dataset}" 88 | partition_method: "uniform" 89 | 90 | 91 | # from fednlp: model_args 92 | default_model_args: 93 | # just for debugging 94 | is_regression: false 95 | task_type: "multi_label" 96 | num_train_epochs: 3.0 97 | do_lower_case: true 98 | per_device_eval_batch_size: 32 99 | fp16: false 100 | gradient_accumulation_steps: 1 101 | learning_rate: 2.0e-5 102 | local_rank: -1 103 | max_grad_norm: 1.0 104 | max_seq_length: 128 105 | model_type: null 106 | save_total_limit: 2 107 | max_steps: -1 108 | per_device_train_batch_size: 32 109 | use_multiprocessing: false # dataloader 110 | labels_map: {} 111 | regression: false 112 | version: 0 113 | partition: -1 114 | device: 'cuda:0' 115 | include_inputs_for_metrics: true 116 | -------------------------------------------------------------------------------- /src/configs/datasets/emotion_gen_mtl.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/emotion_debug' 2 | load_from_zoo: "no" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | seq2seq: true 13 | 14 | mtl: true 15 | mtl_all_tasks: true 16 | mtl_shared_label_space: true 17 | 18 | templates: 19 | dataset: "emotion" 20 | _mtl_models: {} # this will be 'models' after preprocess 21 | 22 | ood_datasets: 23 | ood0: 24 | dataset_name: emoint 25 | ood1: 26 | dataset_name: ssec 27 | ood2: 28 | dataset_name: electoraltweets 29 | ood3: 30 | dataset_name: grounded_emotions 31 | ood4: 32 | dataset_name: affectivetext 33 | ood5: 34 | dataset_name: "dailydialog" 35 | test_only: true 36 | ood6: 37 | dataset_name: "crowdflower" 38 | test_only: true 39 | ood7: 40 | dataset_name: "tec" 41 | test_only: true 42 | ood8: 43 | dataset_name: "tales-emotion" 44 | test_only: true 45 | ood9: 46 | dataset_name: "isear" 47 | test_only: true 48 | 49 | ood_all_is_test: true 50 | 51 | local_models: 52 | output_dir_format: '{main_output_dir}/local_models/{name}' 53 | models: 54 | model0: 55 | dataset_name: "dailydialog" 56 | model1: 57 | dataset_name: "crowdflower" 58 | model2: 59 | dataset_name: "tec" 60 | model3: 61 | dataset_name: "tales-emotion" 62 | model4: 63 | dataset_name: "emobank" 64 | model5: 65 | dataset_name: "isear" 66 | model6: 67 | dataset_name: "emoint" 68 | model7: 69 | dataset_name: "ssec" 70 | model8: 71 | dataset_name: "electoraltweets" 72 | model9: 73 | dataset_name: "fb-valence-arousal-anon" 74 | model10: 75 | dataset_name: "grounded_emotions" 76 | model11: 77 | dataset_name: "emotion-cause" 78 | model12: 79 | dataset_name: "affectivetext" 80 | 81 | merger: 82 | exclude_param_regex: [] 83 | multi_label_head_special: false 84 | 85 | tokenizer: "{resource_dir}/distilbert-base-uncased" 86 | model_type: distilbert 87 | 88 | # for debug 89 | 90 | 91 | global_device: 'cuda:0' 92 | dataset: "{dataset}" 93 | partition_method: "uniform" 94 | 95 | 96 | # from fednlp: model_args 97 | default_model_args: 98 | # just for debugging 99 | is_regression: false 100 | task_type: "multi_label" 101 | num_train_epochs: 3.0 102 | do_lower_case: true 103 | per_device_eval_batch_size: 32 104 | fp16: false 105 | gradient_accumulation_steps: 1 106 | learning_rate: 2.0e-5 107 | local_rank: -1 108 | max_grad_norm: 1.0 109 | max_seq_length: 128 110 | model_type: null 111 | save_total_limit: 2 112 | max_steps: -1 113 | per_device_train_batch_size: 32 114 | use_multiprocessing: false # dataloader 115 | labels_map: {} 116 | regression: false 117 | version: 0 118 | partition: -1 119 | device: 'cuda:0' 120 | include_inputs_for_metrics: true 121 | -------------------------------------------------------------------------------- /src/configs/datasets/emotion_mtl.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/emotion_debug' 2 | load_from_zoo: "no" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | mtl: true 13 | mtl_all_tasks: true 14 | mtl_shared_label_space: true 15 | 16 | templates: 17 | dataset: "emotion" 18 | _mtl_models: {} # this will be 'models' after preprocess 19 | 20 | ood_datasets: 21 | ood0: 22 | dataset_name: emoint 23 | ood1: 24 | dataset_name: ssec 25 | ood2: 26 | dataset_name: electoraltweets 27 | ood3: 28 | dataset_name: grounded_emotions 29 | ood4: 30 | dataset_name: affectivetext 31 | 32 | ood_all_is_test: true 33 | 34 | local_models: 35 | output_dir_format: '{main_output_dir}/local_models/{name}' 36 | models: 37 | model0: 38 | dataset_name: "dailydialog" 39 | model1: 40 | dataset_name: "crowdflower" 41 | model2: 42 | dataset_name: "tec" 43 | model3: 44 | dataset_name: "tales-emotion" 45 | model4: 46 | dataset_name: "emobank" 47 | model5: 48 | dataset_name: "isear" 49 | model6: 50 | dataset_name: "emoint" 51 | model7: 52 | dataset_name: "ssec" 53 | model8: 54 | dataset_name: "electoraltweets" 55 | model9: 56 | dataset_name: "fb-valence-arousal-anon" 57 | model10: 58 | dataset_name: "grounded_emotions" 59 | model11: 60 | dataset_name: "emotion-cause" 61 | model12: 62 | dataset_name: "affectivetext" 63 | 64 | merger: 65 | exclude_param_regex: [] 66 | enabled: false 67 | 68 | tokenizer: "{resource_dir}/distilbert-base-uncased" 69 | model_type: distilbert 70 | 71 | # for debug 72 | 73 | 74 | global_device: 'cuda:0' 75 | dataset: "{dataset}" 76 | partition_method: "uniform" 77 | 78 | 79 | # from fednlp: model_args 80 | default_model_args: 81 | # just for debugging 82 | is_regression: false 83 | task_type: "multi_label" 84 | num_train_epochs: 3.0 85 | do_lower_case: true 86 | per_device_eval_batch_size: 32 87 | fp16: false 88 | gradient_accumulation_steps: 1 89 | learning_rate: 2.0e-5 90 | local_rank: -1 91 | max_grad_norm: 1.0 92 | max_seq_length: 128 93 | model_type: null 94 | save_total_limit: 2 95 | max_steps: -1 96 | per_device_train_batch_size: 32 97 | use_multiprocessing: false # dataloader 98 | labels_map: {} 99 | regression: false 100 | version: 0 101 | partition: -1 102 | device: 'cuda:0' 103 | 104 | -------------------------------------------------------------------------------- /src/configs/datasets/glue.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/glue_debug' 2 | 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | templates: 11 | dataset: "glue" 12 | 13 | local_models: 14 | output_dir_format: '{main_output_dir}/local_models/{name}' 15 | models: 16 | model0: 17 | task_type: classification 18 | dataset_name: cola 19 | partition: -1 20 | device: 'cuda:0' 21 | model1: 22 | task_type: classification 23 | dataset_name: sst2 24 | partition: -1 25 | device: 'cuda:0' 26 | model2: 27 | task_type: classification 28 | dataset_name: mrpc 29 | partition: -1 30 | device: 'cuda:0' 31 | model3: 32 | task_type: classification 33 | dataset_name: stsb 34 | partition: -1 35 | device: 'cuda:0' 36 | is_regression: true 37 | model4: 38 | task_type: classification 39 | dataset_name: mnli 40 | partition: -1 41 | device: 'cuda:0' 42 | model5: 43 | task_type: classification 44 | dataset_name: qnli 45 | partition: -1 46 | device: 'cuda:0' 47 | model6: 48 | task_type: classification 49 | dataset_name: qqp 50 | partition: -1 51 | device: 'cuda:0' 52 | model7: 53 | task_type: classification 54 | dataset_name: rte 55 | partition: -1 56 | device: 'cuda:0' 57 | model8: 58 | task_type: classification 59 | dataset_name: wnli 60 | partition: -1 61 | device: 'cuda:0' 62 | 63 | 64 | merger: 65 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 66 | 67 | tokenizer: "{resource_dir}/distilbert-base-uncased" 68 | model_type: distilbert 69 | 70 | # for debug 71 | 72 | 73 | global_device: 'cuda:0' 74 | dataset: "{dataset}" 75 | partition_method: "uniform" 76 | 77 | 78 | # from fednlp: model_args 79 | default_model_args: 80 | # just for debugging 81 | is_regression: false 82 | num_train_epochs: 3.0 83 | do_lower_case: true 84 | per_device_eval_batch_size: 32 85 | fp16: false 86 | gradient_accumulation_steps: 1 87 | learning_rate: 2.0e-5 88 | local_rank: -1 89 | max_grad_norm: 1.0 90 | max_seq_length: 128 91 | model_type: null 92 | save_total_limit: 2 93 | max_steps: -1 94 | per_device_train_batch_size: 32 95 | use_multiprocessing: false # dataloader 96 | labels_map: {} 97 | regression: false 98 | version: 0 99 | 100 | -------------------------------------------------------------------------------- /src/configs/datasets/glue_mtl.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/glue_debug' 2 | load_from_zoo: "no" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | mtl: true 11 | mtl_all_tasks: true 12 | 13 | templates: 14 | dataset: "glue" 15 | 16 | local_models: 17 | output_dir_format: '{main_output_dir}/local_models/{name}' 18 | _mtl_models: {} # this will be 'models' after preprocess 19 | #mtl_model0: 20 | # components: ['model0', 'model1'] 21 | #mtl_model1: 22 | # components: ['model2', 'model3'] 23 | models: # this will be '_models' after preprocess 24 | model0: 25 | task_type: classification 26 | dataset_name: cola 27 | partition: -1 28 | device: 'cuda:0' 29 | model1: 30 | task_type: classification 31 | dataset_name: sst2 32 | partition: -1 33 | device: 'cuda:0' 34 | model2: 35 | task_type: classification 36 | dataset_name: mrpc 37 | partition: -1 38 | device: 'cuda:0' 39 | model3: 40 | task_type: classification 41 | dataset_name: stsb 42 | partition: -1 43 | device: 'cuda:0' 44 | is_regression: true 45 | model4: 46 | task_type: classification 47 | dataset_name: mnli 48 | partition: -1 49 | device: 'cuda:0' 50 | model5: 51 | task_type: classification 52 | dataset_name: qnli 53 | partition: -1 54 | device: 'cuda:0' 55 | model6: 56 | task_type: classification 57 | dataset_name: qqp 58 | partition: -1 59 | device: 'cuda:0' 60 | model7: 61 | task_type: classification 62 | dataset_name: rte 63 | partition: -1 64 | device: 'cuda:0' 65 | model8: 66 | task_type: classification 67 | dataset_name: wnli 68 | partition: -1 69 | device: 'cuda:0' 70 | 71 | 72 | merger: 73 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 74 | 75 | 76 | tokenizer: "{resource_dir}/distilbert-base-uncased" 77 | model_type: distilbert 78 | 79 | # for debug 80 | 81 | 82 | global_device: 'cuda:0' 83 | dataset: "{dataset}" 84 | partition_method: "uniform" 85 | 86 | 87 | # from fednlp: model_args 88 | default_model_args: 89 | # just for debugging 90 | is_regression: false 91 | num_train_epochs: 3.0 92 | do_lower_case: true 93 | per_device_eval_batch_size: 32 94 | fp16: false 95 | gradient_accumulation_steps: 1 96 | learning_rate: 2.0e-5 97 | local_rank: -1 98 | max_grad_norm: 1.0 99 | max_seq_length: 128 100 | model_type: null 101 | save_total_limit: 2 102 | max_steps: -1 103 | per_device_train_batch_size: 32 104 | use_multiprocessing: false # dataloader 105 | labels_map: {} 106 | regression: false 107 | version: 0 108 | 109 | 110 | -------------------------------------------------------------------------------- /src/configs/datasets/ner.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/ner_debug' 2 | 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | templates: 13 | dataset: "ner" 14 | 15 | tokenizer_add_prefix_space: true 16 | 17 | ood_datasets: 18 | ood0: 19 | dataset_name: "ontonotes@bc" 20 | ood1: 21 | dataset_name: "ontonotes@mz" 22 | ood2: 23 | dataset_name: "ontonotes@tc" 24 | ood3: 25 | dataset_name: "ontonotes@nw" 26 | ood4: 27 | dataset_name: "ontonotes@bn" 28 | ood5: 29 | dataset_name: "ontonotes@wb" 30 | ood6: 31 | dataset_name: "conll" 32 | ood7: 33 | dataset_name: "twitter" 34 | 35 | local_models: 36 | output_dir_format: '{main_output_dir}/local_models/{name}' 37 | models: 38 | model0: 39 | dataset_name: "ontonotes@bc" 40 | model1: 41 | dataset_name: "ontonotes@mz" 42 | model2: 43 | dataset_name: "ontonotes@tc" 44 | model3: 45 | dataset_name: "ontonotes@nw" 46 | model4: 47 | dataset_name: "ontonotes@bn" 48 | model5: 49 | dataset_name: "ontonotes@wb" 50 | model6: 51 | dataset_name: "conll" 52 | model7: 53 | dataset_name: "twitter" 54 | 55 | merger: 56 | exclude_param_regex: [] 57 | 58 | tokenizer: "{resource_dir}/distilbert-base-uncased" 59 | model_type: distilbert 60 | 61 | # for debug 62 | 63 | 64 | global_device: 'cuda:0' 65 | dataset: "{dataset}" 66 | partition_method: "uniform" 67 | 68 | 69 | # from fednlp: model_args 70 | default_model_args: 71 | # just for debugging 72 | is_regression: false 73 | task_type: "token_classification" 74 | num_train_epochs: 3.0 75 | do_lower_case: true 76 | per_device_eval_batch_size: 32 77 | fp16: false 78 | gradient_accumulation_steps: 1 79 | learning_rate: 2.0e-5 80 | local_rank: -1 81 | max_grad_norm: 1.0 82 | max_seq_length: 128 83 | model_type: null 84 | save_total_limit: 2 85 | max_steps: -1 86 | per_device_train_batch_size: 32 87 | use_multiprocessing: false # dataloader 88 | labels_map: {} 89 | regression: false 90 | version: 0 91 | partition: -1 92 | device: 'cuda:0' 93 | 94 | -------------------------------------------------------------------------------- /src/configs/datasets/ner_diffseed.yaml: -------------------------------------------------------------------------------- 1 | output_dir_keys: ["diffseed"] 2 | diffseed: "{dseed_generator}" 3 | dseed_n: 6 4 | 5 | evaluate_locals_before: true 6 | evaluate_locals_after: true 7 | evaluate_global_model: false 8 | evaluate_locals_ood: false 9 | evaluate_global_joint: false 10 | 11 | eval_on_test: true 12 | 13 | templates: 14 | dataset: "ner" 15 | dseed_generator: 0 16 | 17 | tokenizer_add_prefix_space: true 18 | 19 | ood_datasets: 20 | ood0: 21 | dataset_name: "ontonotes@bc" 22 | ood1: 23 | dataset_name: "ontonotes@mz" 24 | ood2: 25 | dataset_name: "ontonotes@tc" 26 | ood3: 27 | dataset_name: "ontonotes@nw" 28 | ood4: 29 | dataset_name: "ontonotes@bn" 30 | ood5: 31 | dataset_name: "ontonotes@wb" 32 | ood6: 33 | dataset_name: "conll" 34 | ood7: 35 | dataset_name: "twitter" 36 | 37 | local_models: 38 | output_dir_format: '{main_output_dir}/local_models/{name}' 39 | models: 40 | model0: 41 | dataset_name: "ontonotes@bc" 42 | seed: "{dseed1}" 43 | zoo_filter: 44 | seed: "{dseed1}" 45 | model1: 46 | dataset_name: "ontonotes@mz" 47 | seed: "{dseed2}" 48 | zoo_filter: 49 | seed: "{dseed2}" 50 | model2: 51 | dataset_name: "ontonotes@tc" 52 | seed: "{dseed3}" 53 | zoo_filter: 54 | seed: "{dseed3}" 55 | model3: 56 | dataset_name: "ontonotes@nw" 57 | seed: "{dseed4}" 58 | zoo_filter: 59 | seed: "{dseed4}" 60 | model4: 61 | dataset_name: "ontonotes@bn" 62 | seed: "{dseed5}" 63 | zoo_filter: 64 | seed: "{dseed5}" 65 | model5: 66 | dataset_name: "ontonotes@wb" 67 | seed: "{dseed6}" 68 | zoo_filter: 69 | seed: "{dseed6}" 70 | model6: 71 | dataset_name: "conll" 72 | model7: 73 | dataset_name: "twitter" 74 | 75 | merger: 76 | exclude_param_regex: [] 77 | 78 | tokenizer: "{resource_dir}/distilbert-base-uncased" 79 | model_type: distilbert 80 | 81 | # for debug 82 | 83 | 84 | global_device: 'cuda:0' 85 | dataset: "{dataset}" 86 | partition_method: "uniform" 87 | 88 | 89 | # from fednlp: model_args 90 | default_model_args: 91 | # just for debugging 92 | is_regression: false 93 | task_type: "token_classification" 94 | num_train_epochs: 3.0 95 | do_lower_case: true 96 | per_device_eval_batch_size: 32 97 | fp16: false 98 | gradient_accumulation_steps: 1 99 | learning_rate: 2.0e-5 100 | local_rank: -1 101 | max_grad_norm: 1.0 102 | max_seq_length: 128 103 | model_type: null 104 | save_total_limit: 2 105 | max_steps: -1 106 | per_device_train_batch_size: 32 107 | use_multiprocessing: false # dataloader 108 | labels_map: {} 109 | regression: false 110 | version: 0 111 | partition: -1 112 | device: 'cuda:0' 113 | 114 | -------------------------------------------------------------------------------- /src/configs/datasets/ner_mtl.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/ner_mtl_debug' 2 | load_from_zoo: "no" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | eval_on_test: true 11 | 12 | mtl: true 13 | mtl_all_tasks: true 14 | mtl_shared_label_space: true 15 | 16 | tokenizer_add_prefix_space: true 17 | 18 | templates: 19 | dataset: "ner" 20 | _mtl_models: {} # this will be 'models' after preprocess 21 | 22 | ood_datasets: 23 | ood0: 24 | dataset_name: "ontonotes@bc" 25 | ood1: 26 | dataset_name: "ontonotes@mz" 27 | ood2: 28 | dataset_name: "ontonotes@tc" 29 | ood3: 30 | dataset_name: "ontonotes@nw" 31 | ood4: 32 | dataset_name: "ontonotes@bn" 33 | ood5: 34 | dataset_name: "ontonotes@wb" 35 | ood6: 36 | dataset_name: "conll" 37 | ood7: 38 | dataset_name: "twitter" 39 | 40 | 41 | local_models: 42 | output_dir_format: '{main_output_dir}/local_models/{name}' 43 | models: 44 | model0: 45 | dataset_name: "ontonotes@bc" 46 | model1: 47 | dataset_name: "ontonotes@mz" 48 | model2: 49 | dataset_name: "ontonotes@tc" 50 | model3: 51 | dataset_name: "ontonotes@nw" 52 | model4: 53 | dataset_name: "ontonotes@bn" 54 | model5: 55 | dataset_name: "ontonotes@wb" 56 | model6: 57 | dataset_name: "conll" 58 | model7: 59 | dataset_name: "twitter" 60 | 61 | merger: 62 | exclude_param_regex: [] 63 | enabled: false 64 | 65 | tokenizer: "{resource_dir}/distilbert-base-uncased" 66 | model_type: distilbert 67 | 68 | # for debug 69 | 70 | 71 | global_device: 'cuda:0' 72 | dataset: "{dataset}" 73 | partition_method: "uniform" 74 | 75 | 76 | # from fednlp: model_args 77 | default_model_args: 78 | # just for debugging 79 | is_regression: false 80 | task_type: "token_classification" 81 | num_train_epochs: 3.0 82 | do_lower_case: true 83 | per_device_eval_batch_size: 32 84 | fp16: false 85 | gradient_accumulation_steps: 1 86 | learning_rate: 2.0e-5 87 | local_rank: -1 88 | max_grad_norm: 1.0 89 | max_seq_length: 128 90 | model_type: null 91 | save_total_limit: 2 92 | max_steps: -1 93 | per_device_train_batch_size: 32 94 | use_multiprocessing: false # dataloader 95 | labels_map: {} 96 | regression: false 97 | version: 0 98 | partition: -1 99 | device: 'cuda:0' 100 | 101 | -------------------------------------------------------------------------------- /src/configs/datasets/subsets/glue_mtl_subset_100.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/glue_debug' 2 | load_from_zoo: "no" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | mtl: true 11 | mtl_all_tasks: true 12 | 13 | local_models: 14 | output_dir_format: '{main_output_dir}/local_models/{name}' 15 | _mtl_models: {} # this will be 'models' after preprocess 16 | #mtl_model0: 17 | # components: ['model0', 'model1'] 18 | #mtl_model1: 19 | # components: ['model2', 'model3'] 20 | models: # this will be '_models' after preprocess 21 | model0: 22 | task_type: classification 23 | dataset_name: cola 24 | partition: -1 25 | device: 'cuda:0' 26 | model1: 27 | task_type: classification 28 | dataset_name: sst2 29 | partition: -1 30 | device: 'cuda:0' 31 | model2: 32 | task_type: classification 33 | dataset_name: mrpc 34 | partition: -1 35 | device: 'cuda:0' 36 | model3: 37 | task_type: classification 38 | dataset_name: stsb 39 | partition: -1 40 | device: 'cuda:0' 41 | is_regression: true 42 | model4: 43 | task_type: classification 44 | dataset_name: mnli 45 | partition: -1 46 | device: 'cuda:0' 47 | model5: 48 | task_type: classification 49 | dataset_name: qnli 50 | partition: -1 51 | device: 'cuda:0' 52 | model6: 53 | task_type: classification 54 | dataset_name: qqp 55 | partition: -1 56 | device: 'cuda:0' 57 | model7: 58 | task_type: classification 59 | dataset_name: rte 60 | partition: -1 61 | device: 'cuda:0' 62 | model8: 63 | task_type: classification 64 | dataset_name: wnli 65 | partition: -1 66 | device: 'cuda:0' 67 | 68 | 69 | merger: 70 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 71 | 72 | tokenizer: "{resource_dir}/distilbert-base-uncased" 73 | model_type: distilbert 74 | 75 | # for debug 76 | 77 | 78 | global_device: 'cuda:0' 79 | dataset: "glue" 80 | partition_method: "uniform" 81 | 82 | 83 | # from fednlp: model_args 84 | default_model_args: 85 | # just for debugging 86 | is_regression: false 87 | num_train_epochs: 3.0 88 | do_lower_case: true 89 | per_device_eval_batch_size: 32 90 | fp16: false 91 | gradient_accumulation_steps: 1 92 | learning_rate: 2.0e-5 93 | local_rank: -1 94 | max_grad_norm: 1.0 95 | max_seq_length: 128 96 | model_type: null 97 | save_total_limit: 2 98 | max_steps: -1 99 | per_device_train_batch_size: 32 100 | use_multiprocessing: false # dataloader 101 | labels_map: {} 102 | regression: false 103 | 104 | version: "subset100_0" 105 | train_subset_n: 100 106 | train_subset_seed: null 107 | -------------------------------------------------------------------------------- /src/configs/datasets/subsets/glue_mtl_subset_1k.yaml: -------------------------------------------------------------------------------- 1 | main_output_dir: 'runs/glue_debug' 2 | load_from_zoo: "no" 3 | 4 | evaluate_locals_before: true 5 | evaluate_locals_after: true 6 | evaluate_global_model: false 7 | evaluate_locals_ood: false 8 | evaluate_global_joint: false 9 | 10 | mtl: true 11 | mtl_all_tasks: true 12 | 13 | local_models: 14 | output_dir_format: '{main_output_dir}/local_models/{name}' 15 | _mtl_models: {} # this will be 'models' after preprocess 16 | #mtl_model0: 17 | # components: ['model0', 'model1'] 18 | #mtl_model1: 19 | # components: ['model2', 'model3'] 20 | models: # this will be '_models' after preprocess 21 | model0: 22 | task_type: classification 23 | dataset_name: cola 24 | partition: -1 25 | device: 'cuda:0' 26 | model1: 27 | task_type: classification 28 | dataset_name: sst2 29 | partition: -1 30 | device: 'cuda:0' 31 | model2: 32 | task_type: classification 33 | dataset_name: mrpc 34 | partition: -1 35 | device: 'cuda:0' 36 | model3: 37 | task_type: classification 38 | dataset_name: stsb 39 | partition: -1 40 | device: 'cuda:0' 41 | is_regression: true 42 | model4: 43 | task_type: classification 44 | dataset_name: mnli 45 | partition: -1 46 | device: 'cuda:0' 47 | model5: 48 | task_type: classification 49 | dataset_name: qnli 50 | partition: -1 51 | device: 'cuda:0' 52 | model6: 53 | task_type: classification 54 | dataset_name: qqp 55 | partition: -1 56 | device: 'cuda:0' 57 | model7: 58 | task_type: classification 59 | dataset_name: rte 60 | partition: -1 61 | device: 'cuda:0' 62 | model8: 63 | task_type: classification 64 | dataset_name: wnli 65 | partition: -1 66 | device: 'cuda:0' 67 | 68 | 69 | merger: 70 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 71 | 72 | tokenizer: "{resource_dir}/distilbert-base-uncased" 73 | model_type: distilbert 74 | 75 | # for debug 76 | 77 | 78 | global_device: 'cuda:0' 79 | dataset: "glue" 80 | partition_method: "uniform" 81 | 82 | 83 | # from fednlp: model_args 84 | default_model_args: 85 | # just for debugging 86 | is_regression: false 87 | num_train_epochs: 3.0 88 | do_lower_case: true 89 | per_device_eval_batch_size: 32 90 | fp16: false 91 | gradient_accumulation_steps: 1 92 | learning_rate: 2.0e-5 93 | local_rank: -1 94 | max_grad_norm: 1.0 95 | max_seq_length: 128 96 | model_type: null 97 | save_total_limit: 2 98 | max_steps: -1 99 | per_device_train_batch_size: 32 100 | use_multiprocessing: false # dataloader 101 | labels_map: {} 102 | regression: false 103 | 104 | version: "subset1k_0" 105 | train_subset_n: 1000 106 | train_subset_seed: null 107 | -------------------------------------------------------------------------------- /src/configs/datasets/subsets/glue_partition_10k_iid.yaml: -------------------------------------------------------------------------------- 1 | evaluate_locals_before: true 2 | evaluate_locals_after: true 3 | evaluate_global_model: false 4 | evaluate_locals_ood: false 5 | evaluate_global_joint: false 6 | 7 | templates: 8 | dataset_name: sst2 9 | seed: 10 10 | 11 | partition: 12 | n_partition: 2 13 | method: iid 14 | n_total_examples: 20000 15 | 16 | local_models: 17 | output_dir_format: '{main_output_dir}/local_models/{name}' 18 | models: 19 | model0: 20 | task_type: classification 21 | dataset_name: "{dataset_name}" 22 | partition: 0 23 | device: 'cuda:0' 24 | version: "iid10k_0_model0" 25 | zoo_filter: 26 | version: "iid10k_0_model0" 27 | model1: 28 | task_type: classification 29 | dataset_name: "{dataset_name}" 30 | partition: 1 31 | device: 'cuda:0' 32 | version: "iid10k_0_model1" 33 | zoo_filter: 34 | version: "iid10k_0_model1" 35 | 36 | merger: 37 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 38 | 39 | tokenizer: "{resource_dir}/distilbert-base-uncased" 40 | model_type: distilbert 41 | 42 | # for debug 43 | 44 | 45 | global_device: 'cuda:0' 46 | dataset: "glue" 47 | 48 | 49 | # from fednlp: model_args 50 | default_model_args: 51 | # just for debugging 52 | is_regression: false 53 | num_train_epochs: 3.0 54 | do_lower_case: true 55 | per_device_eval_batch_size: 32 56 | fp16: false 57 | gradient_accumulation_steps: 1 58 | learning_rate: 2.0e-5 59 | local_rank: -1 60 | max_grad_norm: 1.0 61 | max_seq_length: 128 62 | model_type: null 63 | save_total_limit: 2 64 | max_steps: -1 65 | per_device_train_batch_size: 32 66 | use_multiprocessing: false # dataloader 67 | labels_map: {} 68 | regression: false 69 | 70 | version: "iid10k_0" 71 | -------------------------------------------------------------------------------- /src/configs/datasets/subsets/glue_partition_1k_iid.yaml: -------------------------------------------------------------------------------- 1 | evaluate_locals_before: true 2 | evaluate_locals_after: true 3 | evaluate_global_model: false 4 | evaluate_locals_ood: false 5 | evaluate_global_joint: false 6 | 7 | templates: 8 | dataset_name: sst2 9 | seed: 10 10 | partition_method: "iid" 11 | 12 | partition: 13 | n_partition: 2 14 | method: iid 15 | n_total_examples: 2000 16 | 17 | local_models: 18 | output_dir_format: '{main_output_dir}/local_models/{name}' 19 | models: 20 | model0: 21 | task_type: classification 22 | dataset_name: "{dataset_name}" 23 | partition: 0 24 | device: 'cuda:0' 25 | version: "iid1k_0_model0" 26 | zoo_filter: 27 | version: "iid1k_0_model0" 28 | model1: 29 | task_type: classification 30 | dataset_name: "{dataset_name}" 31 | partition: 1 32 | device: 'cuda:0' 33 | version: "iid1k_0_model1" 34 | zoo_filter: 35 | version: "iid1k_0_model1" 36 | 37 | merger: 38 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 39 | 40 | tokenizer: "{resource_dir}/distilbert-base-uncased" 41 | model_type: distilbert 42 | 43 | # for debug 44 | 45 | 46 | global_device: 'cuda:0' 47 | dataset: "glue" 48 | 49 | 50 | # from fednlp: model_args 51 | default_model_args: 52 | # just for debugging 53 | is_regression: false 54 | num_train_epochs: 3.0 55 | do_lower_case: true 56 | per_device_eval_batch_size: 32 57 | fp16: false 58 | gradient_accumulation_steps: 1 59 | learning_rate: 2.0e-5 60 | local_rank: -1 61 | max_grad_norm: 1.0 62 | max_seq_length: 128 63 | model_type: null 64 | save_total_limit: 2 65 | max_steps: -1 66 | per_device_train_batch_size: 32 67 | use_multiprocessing: false # dataloader 68 | labels_map: {} 69 | regression: false 70 | 71 | version: "iid1k_0" 72 | -------------------------------------------------------------------------------- /src/configs/datasets/subsets/glue_partition_1k_niid.yaml: -------------------------------------------------------------------------------- 1 | evaluate_locals_before: true 2 | evaluate_locals_after: true 3 | evaluate_global_model: false 4 | evaluate_locals_ood: false 5 | evaluate_global_joint: false 6 | 7 | templates: 8 | dataset_name: sst2 9 | seed: 10 10 | partition_method: "niid" 11 | 12 | partition: 13 | n_partition: 2 14 | method: "{partition_method}" 15 | n_total_examples: 2000 16 | niid_label_alpha: 0.8 17 | 18 | local_models: 19 | output_dir_format: '{main_output_dir}/local_models/{name}' 20 | models: 21 | model0: 22 | task_type: classification 23 | dataset_name: "{dataset_name}" 24 | partition: 0 25 | device: 'cuda:0' 26 | version: "{partition_method}_0_model0" 27 | zoo_filter: 28 | version: "{partition_method}_0_model0" 29 | model1: 30 | task_type: classification 31 | dataset_name: "{dataset_name}" 32 | partition: 1 33 | device: 'cuda:0' 34 | version: "{partition_method}_0_model1" 35 | zoo_filter: 36 | version: "{partition_method}_0_model1" 37 | 38 | merger: 39 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 40 | 41 | tokenizer: "{resource_dir}/distilbert-base-uncased" 42 | model_type: distilbert 43 | 44 | # for debug 45 | 46 | 47 | global_device: 'cuda:0' 48 | dataset: "glue" 49 | 50 | 51 | # from fednlp: model_args 52 | default_model_args: 53 | # just for debugging 54 | is_regression: false 55 | num_train_epochs: 3.0 56 | do_lower_case: true 57 | per_device_eval_batch_size: 32 58 | fp16: false 59 | gradient_accumulation_steps: 1 60 | learning_rate: 2.0e-5 61 | local_rank: -1 62 | max_grad_norm: 1.0 63 | max_seq_length: 128 64 | model_type: null 65 | save_total_limit: 2 66 | max_steps: -1 67 | per_device_train_batch_size: 32 68 | use_multiprocessing: false # dataloader 69 | labels_map: {} 70 | regression: false 71 | 72 | version: "iid1k_0" 73 | -------------------------------------------------------------------------------- /src/configs/datasets/subsets/glue_partition_1k_niid_diffseed.yaml: -------------------------------------------------------------------------------- 1 | evaluate_locals_before: true 2 | evaluate_locals_after: true 3 | evaluate_global_model: false 4 | evaluate_locals_ood: false 5 | evaluate_global_joint: false 6 | 7 | templates: 8 | dataset_name: sst2 9 | seed: 10 10 | partition_method: "niid" 11 | dseed_generator: 0 12 | 13 | dseed_n: 2 14 | 15 | partition: 16 | n_partition: 2 17 | method: "{partition_method}" 18 | n_total_examples: 2000 19 | niid_label_alpha: 0.8 20 | 21 | local_models: 22 | output_dir_format: '{main_output_dir}/local_models/{name}' 23 | models: 24 | model0: 25 | task_type: classification 26 | dataset_name: "{dataset_name}" 27 | partition: 0 28 | device: 'cuda:0' 29 | version: "{partition_method}_0_model0" 30 | zoo_filter: 31 | version: "{partition_method}_0_model0" 32 | seed: "{dseed1}" 33 | seed: "{dseed1}" 34 | 35 | model1: 36 | task_type: classification 37 | dataset_name: "{dataset_name}" 38 | partition: 1 39 | device: 'cuda:0' 40 | version: "{partition_method}_0_model1" 41 | zoo_filter: 42 | version: "{partition_method}_0_model1" 43 | seed: "{dseed2}" 44 | seed: "{dseed2}" 45 | 46 | merger: 47 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 48 | 49 | tokenizer: "{resource_dir}/distilbert-base-uncased" 50 | model_type: distilbert 51 | 52 | # for debug 53 | 54 | 55 | global_device: 'cuda:0' 56 | dataset: "glue" 57 | 58 | 59 | # from fednlp: model_args 60 | default_model_args: 61 | # just for debugging 62 | is_regression: false 63 | num_train_epochs: 3.0 64 | do_lower_case: true 65 | per_device_eval_batch_size: 32 66 | fp16: false 67 | gradient_accumulation_steps: 1 68 | learning_rate: 2.0e-5 69 | local_rank: -1 70 | max_grad_norm: 1.0 71 | max_seq_length: 128 72 | model_type: null 73 | save_total_limit: 2 74 | max_steps: -1 75 | per_device_train_batch_size: 32 76 | use_multiprocessing: false # dataloader 77 | labels_map: {} 78 | regression: false 79 | 80 | version: "iid1k_0" 81 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/deberta-large-emotion-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708" 15 | 16 | evaluate_locals_ood_after_merge: false 17 | evaluate_locals_before: true 18 | evaluate_locals_after: true 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-deberta-large/fisher-new-seed{seed}' 22 | default_model_args: 23 | model_name: "{resource_dir}/deberta-v3-large" 24 | version: "hyp0918" 25 | zoo_filter: 26 | version: "hyp0918" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 6.0e-6 33 | num_train_epochs: 20.0 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "epoch" 39 | evaluation_strategy: "epoch" 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | reweight_loss_schema: "sqrt" 43 | tokenizer: "{resource_dir}/deberta-v3-large" 44 | model_type: deberta 45 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/deberta-large-emotion-locals.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: false 12 | evaluate_locals_before: true 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | enabled: false 17 | 18 | seed: "{seed}" 19 | main_output_dir: 'runs/emotion-deberta-large/locals-new-lr1e-5-seed{seed}' 20 | default_model_args: 21 | model_name: "{resource_dir}/deberta-v3-large" 22 | version: "hyp0918" 23 | zoo_filter: 24 | version: "hyp0918" 25 | seed: "{seed}" 26 | do_lower_case: false 27 | per_device_train_batch_size: 16 28 | lr_scheduler_type: "polynomial" 29 | warmup_ratio: 0.06 30 | learning_rate: 1.0e-5 31 | num_train_epochs: 20.0 32 | #adam_beta1: 0.9 33 | #adam_beta2: 0.98 34 | #adam_epsilon: 1.0e-6 35 | #max_grad_norm: 0.0 36 | save_strategy: "epoch" 37 | evaluation_strategy: "epoch" 38 | load_best_model_at_end: true 39 | metric_for_best_model: "key_score" 40 | reweight_loss_schema: "sqrt" 41 | tokenizer: "{resource_dir}/deberta-v3-large" 42 | model_type: deberta 43 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/deberta-large-emotion-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: true 14 | evaluate_locals_after: false 15 | 16 | merger: 17 | enabled: false 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/emotion-deberta-large/mtl-new-seed{seed}' 21 | default_model_args: 22 | model_name: "{resource_dir}/deberta-v3-large" 23 | version: "hyp0918" 24 | zoo_filter: 25 | version: "hyp0918" 26 | seed: "{seed}" 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | lr_scheduler_type: "polynomial" 30 | warmup_ratio: 0.06 31 | learning_rate: 1.0e-5 32 | num_train_epochs: 20.0 33 | #adam_beta1: 0.9 34 | #adam_beta2: 0.98 35 | #adam_epsilon: 1.0e-6 36 | #max_grad_norm: 0.0 37 | save_strategy: "epoch" 38 | evaluation_strategy: "epoch" 39 | #eval_steps: 10 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | reweight_loss_schema: "sqrt" 43 | tokenizer: "{resource_dir}/deberta-v3-large" 44 | model_type: deberta 45 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/deberta-large-emotion-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_before: true 13 | evaluate_locals_after: true 14 | 15 | merger: 16 | regmean_exclude_param_regex: [] 17 | #regmean_exclude_param_regex: [] 18 | regmean_mean: true 19 | gram_n_example: 1000 20 | gram_version: "h_1000_0726_fix_withclassifier" 21 | #regmean_diag: true 22 | regmean_reduce_nondiag: 0.1 23 | 24 | seed: "{seed}" 25 | main_output_dir: 'runs/emotion-deberta-large/deberta-new-regmean-withclassifier-seed{seed}' 26 | default_model_args: 27 | model_name: "{resource_dir}/deberta-v3-large" 28 | version: "hyp0918" 29 | zoo_filter: 30 | version: "hyp0918" 31 | seed: "{seed}" 32 | do_lower_case: false 33 | per_device_train_batch_size: 16 34 | lr_scheduler_type: "polynomial" 35 | warmup_ratio: 0.06 36 | learning_rate: 6.0e-6 37 | num_train_epochs: 20.0 38 | #adam_beta1: 0.9 39 | #adam_beta2: 0.98 40 | #adam_epsilon: 1.0e-6 41 | #max_grad_norm: 0.0 42 | save_strategy: "epoch" 43 | evaluation_strategy: "epoch" 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | reweight_loss_schema: "sqrt" 47 | tokenizer: "{resource_dir}/deberta-v3-large" 48 | model_type: deberta 49 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/deberta-large-emotion.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: true 14 | evaluate_locals_after: true 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/emotion-deberta-large/simple_avg-new-seed{seed}' 18 | default_model_args: 19 | model_name: "{resource_dir}/deberta-v3-large" 20 | version: "hyp0918" 21 | zoo_filter: 22 | version: "hyp0918" 23 | seed: "{seed}" 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | lr_scheduler_type: "polynomial" 27 | warmup_ratio: 0.06 28 | learning_rate: 6.0e-6 29 | num_train_epochs: 20.0 30 | #adam_beta1: 0.9 31 | #adam_beta2: 0.98 32 | #adam_epsilon: 1.0e-6 33 | #max_grad_norm: 0.0 34 | save_strategy: "epoch" 35 | evaluation_strategy: "epoch" 36 | load_best_model_at_end: true 37 | metric_for_best_model: "key_score" 38 | reweight_loss_schema: "sqrt" 39 | tokenizer: "{resource_dir}/deberta-v3-large" 40 | model_type: deberta 41 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/deberta-ner-locals.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: false 12 | evaluate_locals_before: true 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | enabled: false 17 | 18 | seed: "{seed}" 19 | main_output_dir: 'runs/ner-deberta-large/locals-new-lr1e-5-seed{seed}' 20 | default_model_args: 21 | model_name: "{resource_dir}/deberta-v3-large" 22 | version: "hyp0918" 23 | zoo_filter: 24 | version: "hyp0918" 25 | seed: "{seed}" 26 | do_lower_case: false 27 | per_device_train_batch_size: 16 28 | lr_scheduler_type: "polynomial" 29 | warmup_ratio: 0.06 30 | learning_rate: 1.0e-5 31 | num_train_epochs: 20.0 32 | #adam_beta1: 0.9 33 | #adam_beta2: 0.98 34 | #adam_epsilon: 1.0e-6 35 | #max_grad_norm: 0.0 36 | save_strategy: "epoch" 37 | evaluation_strategy: "epoch" 38 | load_best_model_at_end: true 39 | metric_for_best_model: "key_score" 40 | reweight_loss_schema: "sqrt" 41 | tokenizer: "{resource_dir}/deberta-v3-large" 42 | model_type: deberta 43 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/deberta-ner-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: false 12 | evaluate_locals_before: true 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | enabled: false 17 | 18 | seed: "{seed}" 19 | main_output_dir: 'runs/ner-deberta-large/mtl-new-lr1e-5-seed{seed}' 20 | default_model_args: 21 | model_name: "{resource_dir}/deberta-v3-large" 22 | version: "hyp0918" 23 | zoo_filter: 24 | version: "hyp0918" 25 | seed: "{seed}" 26 | do_lower_case: false 27 | per_device_train_batch_size: 16 28 | lr_scheduler_type: "polynomial" 29 | warmup_ratio: 0.06 30 | learning_rate: 1.0e-5 31 | num_train_epochs: 20.0 32 | #adam_beta1: 0.9 33 | #adam_beta2: 0.98 34 | #adam_epsilon: 1.0e-6 35 | #max_grad_norm: 0.0 36 | save_strategy: "epoch" 37 | evaluation_strategy: "epoch" 38 | #eval_steps: 10 39 | load_best_model_at_end: true 40 | metric_for_best_model: "key_score" 41 | reweight_loss_schema: "sqrt" 42 | tokenizer: "{resource_dir}/deberta-v3-large" 43 | model_type: deberta 44 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/ood/deberta-ner-ensemble.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_ensemble_ood: true 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | enabled: false 19 | 20 | ensembler: 21 | enabled: true 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/ner-deberta-large/ensemble-ood-lr1e-5-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/deberta-v3-large" 27 | version: "hyp0918" 28 | zoo_filter: 29 | version: "hyp0918" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | reweight_loss_schema: "sqrt" 46 | tokenizer: "{resource_dir}/deberta-v3-large" 47 | model_type: deberta 48 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/ood/deberta-ner-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0910_emp" 15 | emp_fisher: true 16 | 17 | 18 | evaluate_locals_ood_after_merge: true 19 | evaluate_locals_before: false 20 | evaluate_locals_after: false 21 | 22 | seed: "{seed}" 23 | main_output_dir: 'runs/ner-deberta-large/fisher-ood-lr1e-5-seed{seed}' 24 | default_model_args: 25 | model_name: "{resource_dir}/deberta-v3-large" 26 | version: "hyp0918" 27 | zoo_filter: 28 | version: "hyp0918" 29 | seed: "{seed}" 30 | do_lower_case: false 31 | per_device_train_batch_size: 16 32 | lr_scheduler_type: "polynomial" 33 | warmup_ratio: 0.06 34 | learning_rate: 1.0e-5 35 | num_train_epochs: 20.0 36 | #adam_beta1: 0.9 37 | #adam_beta2: 0.98 38 | #adam_epsilon: 1.0e-6 39 | #max_grad_norm: 0.0 40 | save_strategy: "epoch" 41 | evaluation_strategy: "epoch" 42 | load_best_model_at_end: true 43 | metric_for_best_model: "key_score" 44 | reweight_loss_schema: "sqrt" 45 | tokenizer: "{resource_dir}/deberta-v3-large" 46 | model_type: deberta 47 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/ood/deberta-ner-mtl-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | merger: 17 | enabled: false 18 | 19 | seed: "{seed}" 20 | 21 | load_from_checkpoint: true 22 | load_dir: 'runs/ner-deberta-large/mtl-new-lr1e-5-seed{seed}' 23 | 24 | main_output_dir: 'runs/ner-deberta-large/mtl-new-lr1e-5-ood-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/deberta-v3-large" 27 | version: "hyp0918" 28 | zoo_filter: 29 | version: "hyp0918" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | #eval_steps: 10 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | reweight_loss_schema: "sqrt" 47 | tokenizer: "{resource_dir}/deberta-v3-large" 48 | model_type: deberta 49 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/ood/deberta-ner-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | #regmean_exclude_param_regex: ['.*classifier.*'] 19 | regmean_exclude_param_regex: [] 20 | regmean_mean: true 21 | gram_n_example: 1000 22 | gram_version: "h_1000_0726_fix_withclassifier" 23 | regmean_reduce_nondiag: 0.9 24 | 25 | 26 | seed: "{seed}" 27 | main_output_dir: 'runs/ner-deberta-large/regmean-ood-lr1e-5-seed{seed}' 28 | default_model_args: 29 | model_name: "{resource_dir}/deberta-v3-large" 30 | version: "hyp0918" 31 | zoo_filter: 32 | version: "hyp0918" 33 | seed: "{seed}" 34 | do_lower_case: false 35 | per_device_train_batch_size: 16 36 | lr_scheduler_type: "polynomial" 37 | warmup_ratio: 0.06 38 | learning_rate: 1.0e-5 39 | num_train_epochs: 20.0 40 | #adam_beta1: 0.9 41 | #adam_beta2: 0.98 42 | #adam_epsilon: 1.0e-6 43 | #max_grad_norm: 0.0 44 | save_strategy: "epoch" 45 | evaluation_strategy: "epoch" 46 | load_best_model_at_end: true 47 | metric_for_best_model: "key_score" 48 | reweight_loss_schema: "sqrt" 49 | tokenizer: "{resource_dir}/deberta-v3-large" 50 | model_type: deberta 51 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ner/ood/deberta-ner.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: true 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seed: "{seed}" 18 | main_output_dir: 'runs/ner-deberta-large/simple-ood-lr1e-5-seed{seed}' 19 | default_model_args: 20 | model_name: "{resource_dir}/deberta-v3-large" 21 | version: "hyp0918" 22 | zoo_filter: 23 | version: "hyp0918" 24 | seed: "{seed}" 25 | do_lower_case: false 26 | per_device_train_batch_size: 16 27 | lr_scheduler_type: "polynomial" 28 | warmup_ratio: 0.06 29 | learning_rate: 1.0e-5 30 | num_train_epochs: 20.0 31 | #adam_beta1: 0.9 32 | #adam_beta2: 0.98 33 | #adam_epsilon: 1.0e-6 34 | #max_grad_norm: 0.0 35 | save_strategy: "epoch" 36 | evaluation_strategy: "epoch" 37 | load_best_model_at_end: true 38 | metric_for_best_model: "key_score" 39 | reweight_loss_schema: "sqrt" 40 | tokenizer: "{resource_dir}/deberta-v3-large" 41 | model_type: deberta 42 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ood/deberta-large-emotion-ensemble-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_ensemble_ood: true 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | enabled: false 19 | 20 | ensembler: 21 | enabled: true 22 | handle_missing_label: true 23 | 24 | 25 | seed: "{seed}" 26 | main_output_dir: 'runs/emotion-deberta-large/ensemble-new-ood-seed{seed}' 27 | default_model_args: 28 | model_name: "{resource_dir}/deberta-v3-large" 29 | version: "hyp0918" 30 | zoo_filter: 31 | version: "hyp0918" 32 | seed: "{seed}" 33 | do_lower_case: false 34 | per_device_train_batch_size: 16 35 | lr_scheduler_type: "polynomial" 36 | warmup_ratio: 0.06 37 | learning_rate: 6.0e-6 38 | num_train_epochs: 20.0 39 | #adam_beta1: 0.9 40 | #adam_beta2: 0.98 41 | #adam_epsilon: 1.0e-6 42 | #max_grad_norm: 0.0 43 | save_strategy: "epoch" 44 | evaluation_strategy: "epoch" 45 | load_best_model_at_end: true 46 | metric_for_best_model: "key_score" 47 | reweight_loss_schema: "sqrt" 48 | tokenizer: "{resource_dir}/deberta-v3-large" 49 | model_type: deberta 50 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ood/deberta-large-emotion-fisher-norm-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708" 15 | fisher_normalize: "param" 16 | evaluate_locals_ood_after_merge: true 17 | evaluate_locals_before: false 18 | evaluate_locals_after: false 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-deberta-large/fisher-new-norm-ood-seed{seed}' 22 | default_model_args: 23 | model_name: "{resource_dir}/deberta-v3-large" 24 | version: "hyp0918" 25 | zoo_filter: 26 | version: "hyp0918" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 6.0e-6 33 | num_train_epochs: 20.0 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "epoch" 39 | evaluation_strategy: "epoch" 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | reweight_loss_schema: "sqrt" 43 | tokenizer: "{resource_dir}/deberta-v3-large" 44 | model_type: deberta 45 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ood/deberta-large-emotion-fisher-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708" 15 | 16 | evaluate_locals_ood_after_merge: true 17 | evaluate_locals_before: false 18 | evaluate_locals_after: false 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-deberta-large/fisher-new-ood-seed{seed}' 22 | default_model_args: 23 | model_name: "{resource_dir}/deberta-v3-large" 24 | version: "hyp0918" 25 | zoo_filter: 26 | version: "hyp0918" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 6.0e-6 33 | num_train_epochs: 20.0 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "epoch" 39 | evaluation_strategy: "epoch" 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | reweight_loss_schema: "sqrt" 43 | tokenizer: "{resource_dir}/deberta-v3-large" 44 | model_type: deberta 45 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ood/deberta-large-emotion-mtl-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | load_from_checkpoint: true 17 | load_dir: 'runs/emotion-deberta-large/mtl-new-seed{seed}' 18 | 19 | merger: 20 | enabled: false 21 | 22 | seed: "{seed}" 23 | main_output_dir: 'runs/emotion-deberta-large/mtl-new-ood-seed{seed}' 24 | default_model_args: 25 | model_name: "{resource_dir}/deberta-v3-large" 26 | version: "hyp0918" 27 | zoo_filter: 28 | version: "hyp0918" 29 | seed: "{seed}" 30 | do_lower_case: false 31 | per_device_train_batch_size: 16 32 | lr_scheduler_type: "polynomial" 33 | warmup_ratio: 0.06 34 | learning_rate: 1.0e-5 35 | num_train_epochs: 20.0 36 | #adam_beta1: 0.9 37 | #adam_beta2: 0.98 38 | #adam_epsilon: 1.0e-6 39 | #max_grad_norm: 0.0 40 | save_strategy: "epoch" 41 | evaluation_strategy: "epoch" 42 | #eval_steps: 10 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | reweight_loss_schema: "sqrt" 46 | tokenizer: "{resource_dir}/deberta-v3-large" 47 | model_type: deberta 48 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ood/deberta-large-emotion-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: true 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/emotion-deberta-large/simple_avg-new-ood-seed{seed}' 18 | default_model_args: 19 | model_name: "{resource_dir}/deberta-v3-large" 20 | version: "hyp0918" 21 | zoo_filter: 22 | version: "hyp0918" 23 | seed: "{seed}" 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | lr_scheduler_type: "polynomial" 27 | warmup_ratio: 0.06 28 | learning_rate: 6.0e-6 29 | num_train_epochs: 20.0 30 | #adam_beta1: 0.9 31 | #adam_beta2: 0.98 32 | #adam_epsilon: 1.0e-6 33 | #max_grad_norm: 0.0 34 | save_strategy: "epoch" 35 | evaluation_strategy: "epoch" 36 | load_best_model_at_end: true 37 | metric_for_best_model: "key_score" 38 | reweight_loss_schema: "sqrt" 39 | tokenizer: "{resource_dir}/deberta-v3-large" 40 | model_type: deberta 41 | -------------------------------------------------------------------------------- /src/configs/exps/deberta/ood/deberta-large-emotion-regmean-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | deberta-v3-large: "s3://ANONYMOUS/deberta-v3-large" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: true 12 | evaluate_locals_before: false 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | regmean_exclude_param_regex: [] 17 | #regmean_exclude_param_regex: [] 18 | regmean_mean: true 19 | gram_n_example: 1000 20 | gram_version: "h_1000_0726_fix_withclassifier" 21 | #regmean_diag: true 22 | regmean_reduce_nondiag: 0.1 23 | 24 | seed: "{seed}" 25 | main_output_dir: 'runs/emotion-deberta-large/deberta-new-withclassifier-ood-seed{seed}' 26 | default_model_args: 27 | model_name: "{resource_dir}/deberta-v3-large" 28 | version: "hyp0918" 29 | zoo_filter: 30 | version: "hyp0918" 31 | seed: "{seed}" 32 | do_lower_case: false 33 | per_device_train_batch_size: 16 34 | lr_scheduler_type: "polynomial" 35 | warmup_ratio: 0.06 36 | learning_rate: 6.0e-6 37 | num_train_epochs: 20.0 38 | #adam_beta1: 0.9 39 | #adam_beta2: 0.98 40 | #adam_epsilon: 1.0e-6 41 | #max_grad_norm: 0.0 42 | save_strategy: "epoch" 43 | evaluation_strategy: "epoch" 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | reweight_loss_schema: "sqrt" 47 | tokenizer: "{resource_dir}/deberta-v3-large" 48 | model_type: deberta 49 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/coeff/distilbert-coeff-50-fisher-fixregression-normalize.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | upload_runs_to_s3: true 11 | 12 | evaluate_locals_before: false 13 | 14 | merger: 15 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 16 | coeff_search_method: 'grid' 17 | n_trials: 51 18 | fisher_weighted: true 19 | fisher_n_example: 1000 20 | fisher_version: "h_1000_fix0708_regression" 21 | fisher_normalize: "param" 22 | 23 | 24 | seed: 1 25 | main_output_dir: 'runs/glue-distilbert-base-uncased/coeff_grid_search_fisher_new_51_fixregression_normalize' 26 | default_model_args: 27 | model_name: "{resource_dir}/distilbert-base-uncased" 28 | learning_rate: 2.0e-5 29 | num_train_epochs: 3.0 30 | version: 1 31 | zoo_filter: 32 | version: 1 33 | seed: 1 34 | do_lower_case: true 35 | per_device_train_batch_size: 16 36 | tokenizer: "{resource_dir}/distilbert-base-uncased" 37 | model_type: distilbert 38 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/coeff/distilbert-coeff-50-fisher-fixregression.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | upload_runs_to_s3: true 11 | 12 | evaluate_locals_before: false 13 | 14 | merger: 15 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 16 | coeff_search_method: 'grid' 17 | n_trials: 51 18 | fisher_weighted: true 19 | fisher_n_example: 1000 20 | fisher_version: "h_1000_fix0708_regression" 21 | 22 | 23 | seed: 1 24 | main_output_dir: 'runs/glue-distilbert-base-uncased/coeff_grid_search_fisher_new_51_fixregression' 25 | default_model_args: 26 | model_name: "{resource_dir}/distilbert-base-uncased" 27 | learning_rate: 2.0e-5 28 | num_train_epochs: 3.0 29 | version: 1 30 | zoo_filter: 31 | version: 1 32 | seed: 1 33 | do_lower_case: true 34 | per_device_train_batch_size: 16 35 | tokenizer: "{resource_dir}/distilbert-base-uncased" 36 | model_type: distilbert 37 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/coeff/distilbert-coeff-50-fisher-normalize.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: 'grid' 15 | n_trials: 51 16 | fisher_weighted: true 17 | fisher_n_example: 1000 18 | fisher_version: "h_1000_fix0706" 19 | fisher_normalize: "param" 20 | 21 | 22 | seed: 1 23 | main_output_dir: 'runs/glue-distilbert-base-uncased/coeff_grid_search_fisher_new_51_normalize' 24 | default_model_args: 25 | model_name: "{resource_dir}/distilbert-base-uncased" 26 | learning_rate: 2.0e-5 27 | num_train_epochs: 3.0 28 | version: 1 29 | zoo_filter: 30 | version: 1 31 | seed: 1 32 | do_lower_case: true 33 | per_device_train_batch_size: 16 34 | tokenizer: "{resource_dir}/distilbert-base-uncased" 35 | model_type: distilbert 36 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/coeff/distilbert-coeff-50-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: 'grid' 15 | n_trials: 51 16 | fisher_weighted: true 17 | fisher_n_example: 1000 18 | fisher_version: "h_1000_fix0706" 19 | 20 | 21 | seed: 1 22 | main_output_dir: 'runs/glue-distilbert-base-uncased/coeff_grid_search_fisher_new_51' 23 | default_model_args: 24 | model_name: "{resource_dir}/distilbert-base-uncased" 25 | learning_rate: 2.0e-5 26 | num_train_epochs: 3.0 27 | version: 1 28 | zoo_filter: 29 | version: 1 30 | seed: 1 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | tokenizer: "{resource_dir}/distilbert-base-uncased" 34 | model_type: distilbert 35 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/coeff/distilbert-coeff-50.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: 'grid' 15 | n_trials: 51 16 | 17 | 18 | seed: 1 19 | main_output_dir: 'runs/glue-distilbert-base-uncased/coeff_grid_search_new_51' 20 | default_model_args: 21 | model_name: "{resource_dir}/distilbert-base-uncased" 22 | learning_rate: 2.0e-5 23 | num_train_epochs: 3.0 24 | version: 1 25 | zoo_filter: 26 | version: 1 27 | seed: 1 28 | do_lower_case: true 29 | per_device_train_batch_size: 16 30 | tokenizer: "{resource_dir}/distilbert-base-uncased" 31 | model_type: distilbert 32 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/coeff/distilbert-coeff.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: 'grid' 15 | n_trials: 11 16 | 17 | 18 | seed: 1 19 | main_output_dir: 'runs/glue-distilbert-base-uncased/coeff_grid_search_new' 20 | default_model_args: 21 | model_name: "{resource_dir}/distilbert-base-uncased" 22 | learning_rate: 2.0e-5 23 | num_train_epochs: 3.0 24 | version: 1 25 | zoo_filter: 26 | version: 1 27 | seed: 1 28 | do_lower_case: true 29 | per_device_train_batch_size: 16 30 | tokenizer: "{resource_dir}/distilbert-base-uncased" 31 | model_type: distilbert 32 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/distilbert-base.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | seed: 1 13 | main_output_dir: 'runs/glue-distilbert-base-uncased/simple_avg' 14 | default_model_args: 15 | model_name: "{resource_dir}/distilbert-base-uncased" 16 | learning_rate: 2.0e-5 17 | num_train_epochs: 3.0 18 | version: 1 19 | zoo_filter: 20 | version: 1 21 | seed: 1 22 | num_train_epochs: 3.0 23 | do_lower_case: true 24 | per_device_train_batch_size: 16 25 | tokenizer: "{resource_dir}/distilbert-base-uncased" 26 | model_type: distilbert 27 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/distilbert-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708_regression" 15 | 16 | seed: 1 17 | main_output_dir: 'runs/glue-distilbert-base-uncased/fisher/n_example_1000_fix0708_regression' 18 | 19 | default_model_args: 20 | model_name: "{resource_dir}/distilbert-base-uncased" 21 | learning_rate: 2.0e-5 22 | num_train_epochs: 3.0 23 | version: 1 24 | zoo_filter: 25 | version: 1 26 | seed: 1 27 | num_train_epochs: 3.0 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | tokenizer: "{resource_dir}/distilbert-base-uncased" 31 | model_type: distilbert 32 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/distilbert-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | enabled: false 12 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 13 | 14 | 15 | seed: 1 16 | main_output_dir: 'runs/glue-distilbert-base-uncased/mtl/' 17 | default_model_args: 18 | model_name: "{resource_dir}/distilbert-base-uncased" 19 | learning_rate: 2.0e-5 20 | num_train_epochs: 3.0 21 | version: "mtl_1" 22 | zoo_filter: 23 | version: "mtl_1" 24 | seed: 1 25 | do_lower_case: true 26 | per_device_train_batch_size: 16 27 | #max_steps: 10 28 | tokenizer: "{resource_dir}/distilbert-base-uncased" 29 | model_type: distilbert 30 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/distilbert-ot.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: null 15 | fisher_weighted: false 16 | algo: 'ot' 17 | ot_patterns: 18 | pattern0: 19 | ot_filter_regex: ['.*ffn$'] 20 | ot_lin1: 'lin1' 21 | ot_lin2: 'lin2' 22 | 23 | seed: 1 24 | main_output_dir: 'runs/glue-distilbert-base-uncased/ot' 25 | default_model_args: 26 | model_name: "{resource_dir}/distilbert-base-uncased" 27 | learning_rate: 2.0e-5 28 | num_train_epochs: 3.0 29 | version: 3 30 | zoo_filter: 31 | version: 3 32 | seed: 1 33 | do_lower_case: true 34 | per_device_train_batch_size: 16 35 | max_steps: -1 36 | tokenizer: "{resource_dir}/distilbert-base-uncased" 37 | model_type: distilbert 38 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/distilbert-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | regmean_mean: true 15 | gram_n_example: 1000 16 | gram_version: "h_1000_0726_fix" 17 | 18 | seed: 1 19 | main_output_dir: 'runs/glue-distilbert-base-uncased/regmean/n_example_1000' 20 | 21 | default_model_args: 22 | model_name: "{resource_dir}/distilbert-base-uncased" 23 | learning_rate: 2.0e-5 24 | num_train_epochs: 3.0 25 | version: 1 26 | zoo_filter: 27 | version: 1 28 | seed: 1 29 | num_train_epochs: 3.0 30 | do_lower_case: false 31 | per_device_train_batch_size: 16 32 | tokenizer: "{resource_dir}/distilbert-base-uncased" 33 | model_type: distilbert 34 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/fisher/distilbert-fisher_10.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: true 13 | fisher_n_example: 10 14 | fisher_version: "h_10_fix0706" 15 | 16 | seed: 1 17 | main_output_dir: 'runs/glue-distilbert-base-uncased/fisher/n_example_10' 18 | default_model_args: 19 | model_name: "{resource_dir}/distilbert-base-uncased" 20 | learning_rate: 2.0e-5 21 | num_train_epochs: 3.0 22 | version: 1 23 | zoo_filter: 24 | version: 1 25 | seed: 1 26 | num_train_epochs: 3.0 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | tokenizer: "{resource_dir}/distilbert-base-uncased" 30 | model_type: distilbert 31 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/fisher/distilbert-fisher_100.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: true 13 | fisher_n_example: 100 14 | fisher_version: "h_100_fix0706" 15 | 16 | seed: 1 17 | main_output_dir: 'runs/glue-distilbert-base-uncased/fisher/n_example_100' 18 | default_model_args: 19 | model_name: "{resource_dir}/distilbert-base-uncased" 20 | learning_rate: 2.0e-5 21 | num_train_epochs: 3.0 22 | version: 1 23 | zoo_filter: 24 | version: 1 25 | seed: 1 26 | num_train_epochs: 3.0 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | tokenizer: "{resource_dir}/distilbert-base-uncased" 30 | model_type: distilbert 31 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/fisher/distilbert-fisher_1000-normalize.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708_regression" 15 | fisher_normalize: "param" 16 | 17 | seed: 1 18 | main_output_dir: 'runs/glue-distilbert-base-uncased/fisher/n_example_1000_fix0708_regression_normalize' 19 | 20 | default_model_args: 21 | model_name: "{resource_dir}/distilbert-base-uncased" 22 | learning_rate: 2.0e-5 23 | num_train_epochs: 3.0 24 | version: 1 25 | zoo_filter: 26 | version: 1 27 | seed: 1 28 | num_train_epochs: 3.0 29 | do_lower_case: false 30 | per_device_train_batch_size: 16 31 | tokenizer: "{resource_dir}/distilbert-base-uncased" 32 | model_type: distilbert 33 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/fisher/distilbert-fisher_abl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | fisher_n_example: 10 14 | fisher_version: "h_10" 15 | 16 | seed: 1 17 | main_output_dir: 'runs/glue-distilbert-base-uncased/fisher/n_example_abl' 18 | default_model_args: 19 | model_name: "{resource_dir}/distilbert-base-uncased" 20 | learning_rate: 2.0e-5 21 | num_train_epochs: 3.0 22 | version: 1 23 | zoo_filter: 24 | version: 1 25 | seed: 1 26 | num_train_epochs: 3.0 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | tokenizer: "{resource_dir}/distilbert-base-uncased" 30 | model_type: distilbert 31 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/regmean/distilbert-regmean-coeff.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | fisher_weighted: false 14 | coeff_search_method: 'grid' 15 | n_trials: 51 16 | 17 | regmean_mean: true 18 | gram_n_example: 1000 19 | gram_version: "h_1000_0726_fix" 20 | 21 | seed: 1 22 | main_output_dir: 'runs/glue-distilbert-base-uncased/regmean/n_example_1000_coeff' 23 | 24 | default_model_args: 25 | model_name: "{resource_dir}/distilbert-base-uncased" 26 | learning_rate: 2.0e-5 27 | num_train_epochs: 3.0 28 | version: 1 29 | zoo_filter: 30 | version: 1 31 | seed: 1 32 | num_train_epochs: 3.0 33 | do_lower_case: false 34 | per_device_train_batch_size: 16 35 | tokenizer: "{resource_dir}/distilbert-base-uncased" 36 | model_type: distilbert 37 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-iid-10k-regmean-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | regmean_mean: true 15 | gram_n_example: 1000 16 | gram_version: "h_1000_0726_fix_whead" 17 | exclude_param_regex: [] 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/glue-distilbert-base-uncased/iid_10k_regmean/{dataset_name}/{seed}' 21 | 22 | default_model_args: 23 | model_name: "{resource_dir}/distilbert-base-uncased" 24 | learning_rate: 2.0e-5 25 | num_train_epochs: 20 26 | version: "iid10k_0" 27 | zoo_filter: 28 | version: "iid10k_0" 29 | seed: "{seed}" 30 | num_train_epochs: 20 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | # evaluation_strategy: "steps" 34 | # eval_steps: 200 35 | # load_best_model_at_end: true 36 | # metric_for_best_model: "key_score" 37 | # save_steps: 200 38 | # greater_is_better: true 39 | tokenizer: "{resource_dir}/distilbert-base-uncased" 40 | model_type: distilbert 41 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-iid-1k-debug.yaml: -------------------------------------------------------------------------------- 1 | # required_resources: 2 | # 3 | # partition_files: "s3://ANONYMOUS/partition_files" 4 | # distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 5 | # huggingface: "s3://ANONYMOUS/huggingface" 6 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 7 | load_from_zoo_use_remote: false 8 | #resource_dir: "resources" 9 | push_to_remote_zoo: false 10 | push_to_local_zoo: true 11 | 12 | merger: 13 | coeff_search_method: null 14 | fisher_weighted: false 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/glue-distilbert-base-uncased/iid_1k/{dataset_name}/seed{seed}' 18 | 19 | default_model_args: 20 | model_name: "{resource_dir}/distilbert-base-uncased" 21 | learning_rate: 2.0e-5 22 | num_train_epochs: 30.0 23 | version: "iid1k_0" 24 | zoo_filter: 25 | version: "iid1k_0" 26 | seed: "{seed}" 27 | num_train_epochs: 30.0 28 | do_lower_case: true 29 | per_device_train_batch_size: 16 30 | evaluation_strategy: "steps" 31 | eval_steps: 5 32 | max_steps: 10 33 | load_best_model_at_end: true 34 | metric_for_best_model: "key_score" 35 | 36 | tokenizer: "{resource_dir}/distilbert-base-uncased" 37 | model_type: distilbert 38 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-iid-1k-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | regmean_mean: true 15 | gram_n_example: 1000 16 | gram_version: "h_1000_0726_fix" 17 | 18 | seed: 11 19 | main_output_dir: 'runs/glue-distilbert-base-uncased/iid_1k_regmean/{dataset_name}' 20 | 21 | default_model_args: 22 | model_name: "{resource_dir}/distilbert-base-uncased" 23 | learning_rate: 2.0e-5 24 | num_train_epochs: 20.0 25 | version: "iid1k_0" 26 | zoo_filter: 27 | version: "iid1k_0" 28 | seed: 11 29 | num_train_epochs: 20.0 30 | do_lower_case: true 31 | per_device_train_batch_size: 16 32 | evaluation_strategy: "steps" 33 | eval_steps: 500 34 | load_best_model_at_end: true 35 | metric_for_best_model: "key_score" 36 | save_steps: 500 37 | 38 | 39 | tokenizer: "{resource_dir}/distilbert-base-uncased" 40 | model_type: distilbert 41 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-iid-1k-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | # regmean_mean: true 15 | # gram_n_example: 1000 16 | # gram_version: "h_1000_0726_fix" 17 | exclude_param_regex: [] 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/glue-distilbert-base-uncased/iid_1k/{dataset_name}/{seed}' 21 | 22 | default_model_args: 23 | model_name: "{resource_dir}/distilbert-base-uncased" 24 | learning_rate: 2.0e-5 25 | num_train_epochs: 20 26 | version: "iid1k_0" 27 | zoo_filter: 28 | version: "iid1k_0" 29 | seed: "{seed}" 30 | num_train_epochs: 20 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | #evaluation_strategy: "steps" 34 | #eval_steps: 200 35 | #load_best_model_at_end: true 36 | #metric_for_best_model: "key_score" 37 | #save_steps: 200 38 | #greater_is_better: true 39 | 40 | tokenizer: "{resource_dir}/distilbert-base-uncased" 41 | model_type: distilbert 42 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-iid-1k.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | # regmean_mean: true 15 | # gram_n_example: 1000 16 | # gram_version: "h_1000_0726_fix" 17 | 18 | seed: {seed} 19 | main_output_dir: 'runs/glue-distilbert-base-uncased/iid_1k/{dataset_name}/seed{seed}' 20 | 21 | default_model_args: 22 | model_name: "{resource_dir}/distilbert-base-uncased" 23 | learning_rate: 2.0e-5 24 | num_train_epochs: 20.0 25 | version: "iid1k_0" 26 | zoo_filter: 27 | version: "iid1k_0" 28 | seed: {seed} 29 | num_train_epochs: 20.0 30 | do_lower_case: true 31 | per_device_train_batch_size: 16 32 | evaluation_strategy: "steps" 33 | eval_steps: 500 34 | load_best_model_at_end: true 35 | metric_for_best_model: "key_score" 36 | 37 | tokenizer: "{resource_dir}/distilbert-base-uncased" 38 | model_type: distilbert 39 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-niid-1k-fisher-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0726" 15 | exclude_param_regex: [] 16 | 17 | seed: "{seed}" 18 | main_output_dir: 'runs/glue-distilbert-base-uncased/niid_{partition_method}_1k_fisher/{dataset_name}/{seed}' 19 | 20 | default_model_args: 21 | model_name: "{resource_dir}/distilbert-base-uncased" 22 | learning_rate: 2.0e-5 23 | num_train_epochs: 20 24 | version: "{partition_method}1k_0" 25 | zoo_filter: 26 | version: "{partition_method}_0" 27 | seed: "{seed}" 28 | num_train_epochs: 20 29 | do_lower_case: true 30 | per_device_train_batch_size: 16 31 | # evaluation_strategy: "steps" 32 | # eval_steps: 200 33 | # load_best_model_at_end: true 34 | # metric_for_best_model: "key_score" 35 | # save_steps: 200 36 | # greater_is_better: true 37 | tokenizer: "{resource_dir}/distilbert-base-uncased" 38 | model_type: distilbert 39 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-niid-1k-regmean-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | regmean_mean: true 15 | gram_n_example: 1000 16 | gram_version: "h_1000_0726_fix_whead" 17 | exclude_param_regex: [] 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/glue-distilbert-base-uncased/niid_{partition_method}_1k_regmean/{dataset_name}/{seed}' 21 | 22 | default_model_args: 23 | model_name: "{resource_dir}/distilbert-base-uncased" 24 | learning_rate: 2.0e-5 25 | num_train_epochs: 20 26 | version: "{partition_method}1k_0" 27 | zoo_filter: 28 | version: "{partition_method}_0" 29 | seed: "{seed}" 30 | num_train_epochs: 20 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | # evaluation_strategy: "steps" 34 | # eval_steps: 200 35 | # load_best_model_at_end: true 36 | # metric_for_best_model: "key_score" 37 | # save_steps: 200 38 | # greater_is_better: true 39 | tokenizer: "{resource_dir}/distilbert-base-uncased" 40 | model_type: distilbert 41 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/subset/distilbert-niid-1k-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | exclude_param_regex: [] 14 | 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/glue-distilbert-base-uncased/niid_{partition_method}_1k_whead/{dataset_name}/{seed}' 18 | 19 | default_model_args: 20 | model_name: "{resource_dir}/distilbert-base-uncased" 21 | learning_rate: 2.0e-5 22 | num_train_epochs: 20 23 | version: "{partition_method}1k_0" 24 | zoo_filter: 25 | version: "{partition_method}_0" 26 | seed: "{seed}" 27 | num_train_epochs: 20 28 | do_lower_case: true 29 | per_device_train_batch_size: 16 30 | # evaluation_strategy: "steps" 31 | # eval_steps: 200 32 | # load_best_model_at_end: true 33 | # metric_for_best_model: "key_score" 34 | # save_steps: 200 35 | # greater_is_better: true 36 | tokenizer: "{resource_dir}/distilbert-base-uncased" 37 | model_type: distilbert 38 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/tsp/distilbert-tsp1-5.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*', ".*layer.5.*", ".*layer.4.*", ".*layer.3.*", ".*layer.2.*", ".*layer.1.*"] 12 | 13 | 14 | seed: 1 15 | main_output_dir: 'runs/glue-distilbert-base-uncased/tsp_layer1-5/seed1' 16 | default_model_args: 17 | model_name: "{resource_dir}/distilbert-base-uncased" 18 | learning_rate: 2.0e-5 19 | num_train_epochs: 3.0 20 | version: 1 21 | zoo_filter: 22 | version: 1 23 | seed: 1 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | tokenizer: "{resource_dir}/distilbert-base-uncased" 27 | model_type: distilbert 28 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/tsp/distilbert-tsp2-5.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*', ".*layer.5.*", ".*layer.4.*", ".*layer.3.*", ".*layer.2.*"] 12 | 13 | 14 | seed: 1 15 | main_output_dir: 'runs/glue-distilbert-base-uncased/tsp_layer2-5/seed1' 16 | default_model_args: 17 | model_name: "{resource_dir}/distilbert-base-uncased" 18 | learning_rate: 2.0e-5 19 | num_train_epochs: 3.0 20 | version: 1 21 | zoo_filter: 22 | version: 1 23 | seed: 1 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | tokenizer: "{resource_dir}/distilbert-base-uncased" 27 | model_type: distilbert 28 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/tsp/distilbert-tsp3-5.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*', ".*layer.5.*", ".*layer.4.*", ".*layer.3.*"] 12 | 13 | 14 | seed: 1 15 | main_output_dir: 'runs/glue-distilbert-base-uncased/tsp_layer3-5/seed1' 16 | default_model_args: 17 | model_name: "{resource_dir}/distilbert-base-uncased" 18 | learning_rate: 2.0e-5 19 | num_train_epochs: 3.0 20 | version: 1 21 | zoo_filter: 22 | version: 1 23 | seed: 1 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | tokenizer: "{resource_dir}/distilbert-base-uncased" 27 | model_type: distilbert 28 | -------------------------------------------------------------------------------- /src/configs/exps/distilbert/tsp/distilbert-tsp4-5.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | distilbert-base-uncased: "s3://ANONYMOUS/distilbert-base-uncased" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*', ".*layer.5.*",".*layer.4.*"] 12 | 13 | 14 | seed: 1 15 | main_output_dir: 'runs/glue-distilbert-base-uncased/tsp_layer4-5/seed1' 16 | default_model_args: 17 | model_name: "{resource_dir}/distilbert-base-uncased" 18 | learning_rate: 2.0e-5 19 | num_train_epochs: 3.0 20 | version: 1 21 | zoo_filter: 22 | version: 1 23 | seed: 1 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | tokenizer: "{resource_dir}/distilbert-base-uncased" 27 | model_type: distilbert 28 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-base-fisher-norm.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 12 | 13 | fisher_weighted: true 14 | fisher_n_example: 1000 15 | fisher_version: "h_1000_fix0708" 16 | fisher_normalize: "param" 17 | 18 | seed: 1 19 | main_output_dir: 'runs/glue-roberta_base/fisher_norm' 20 | default_model_args: 21 | model_name: "{resource_dir}/roberta-base" 22 | version: 1 23 | zoo_filter: 24 | version: 1 25 | seed: 1 26 | do_lower_case: false 27 | per_device_train_batch_size: 16 28 | 29 | tokenizer: "{resource_dir}/roberta-base" 30 | model_type: roberta-base 31 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-base-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 12 | 13 | fisher_weighted: true 14 | fisher_n_example: 1000 15 | fisher_version: "h_1000_fix0708" 16 | 17 | seed: 1 18 | main_output_dir: 'runs/glue-roberta_base/fisher' 19 | default_model_args: 20 | model_name: "{resource_dir}/roberta-base" 21 | version: 1 22 | zoo_filter: 23 | version: 1 24 | seed: 1 25 | do_lower_case: false 26 | per_device_train_batch_size: 16 27 | 28 | tokenizer: "{resource_dir}/roberta-base" 29 | model_type: roberta-base 30 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-base-mtl-debug.yaml: -------------------------------------------------------------------------------- 1 | # required_resources: 2 | # 3 | # partition_files: "s3://ANONYMOUS/partition_files" 4 | # roberta-base: "s3://ANONYMOUS/roberta-base" 5 | # huggingface: "s3://ANONYMOUS/huggingface" 6 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 7 | #load_from_zoo_use_remote: false 8 | #resource_dir: "resources" 9 | push_to_remote_zoo: false 10 | push_to_local_zoo: true 11 | 12 | seed: 1 13 | main_output_dir: 'runs/emotion-roberta_base-hyp1/mtl' 14 | default_model_args: 15 | model_name: "{resource_dir}/roberta-base" 16 | version: "hyp1_mtl" 17 | zoo_filter: 18 | version: "hyp1_mtl" 19 | seed: 1 20 | do_lower_case: false 21 | per_device_train_batch_size: 16 22 | lr_scheduler_type: "polynomial" 23 | warmup_ratio: 0.06 24 | learning_rate: 1.0e-5 25 | num_train_epochs: 10.0 26 | max_steps: 5 27 | 28 | tokenizer: "{resource_dir}/roberta-base" 29 | model_type: roberta 30 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-base-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | seed: 1 11 | main_output_dir: 'runs/glue-roberta_base-hyp1/mtl' 12 | default_model_args: 13 | model_name: "{resource_dir}/roberta-base" 14 | version: "hyp1_mtl" 15 | zoo_filter: 16 | version: "hyp1_mtl" 17 | seed: 1 18 | do_lower_case: false 19 | per_device_train_batch_size: 16 20 | lr_scheduler_type: "polynomial" 21 | warmup_ratio: 0.06 22 | learning_rate: 1.0e-5 23 | num_train_epochs: 10.0 24 | 25 | tokenizer: "{resource_dir}/roberta-base" 26 | model_type: roberta 27 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-base-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 12 | 13 | regmean_mean: true 14 | gram_n_example: 1000 15 | gram_version: "h_1000_0726_fix" 16 | 17 | 18 | seed: 1 19 | main_output_dir: 'runs/glue-roberta_base/regmean' 20 | default_model_args: 21 | model_name: "{resource_dir}/roberta-base" 22 | version: 1 23 | zoo_filter: 24 | version: 1 25 | seed: 1 26 | do_lower_case: false 27 | per_device_train_batch_size: 16 28 | 29 | tokenizer: "{resource_dir}/roberta-base" 30 | model_type: roberta-base 31 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-base.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | seed: 1 11 | main_output_dir: 'runs/glue-roberta_base-hyp2/simple_avg' 12 | default_model_args: 13 | model_name: "{resource_dir}/roberta-base" 14 | version: "hyp2" 15 | zoo_filter: 16 | version: "hyp2" 17 | seed: 1 18 | do_lower_case: false 19 | per_device_train_batch_size: 16 20 | lr_scheduler_type: "polynomial" 21 | warmup_ratio: 0.06 22 | learning_rate: 1.0e-5 23 | num_train_epochs: 10.0 24 | #adam_beta1: 0.9 25 | #adam_beta2: 0.98 26 | #adam_epsilon: 1.0e-6 27 | #max_grad_norm: 0.0 28 | evaluation_strategy: "epoch" 29 | #weight_decay: 0.1 30 | load_best_model_at_end: true 31 | metric_for_best_model: "key_score" 32 | tokenizer: "{resource_dir}/roberta-base" 33 | model_type: roberta-base 34 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-coeff-50-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: 'grid' 15 | n_trials: 51 16 | fisher_weighted: true 17 | fisher_n_example: 1000 18 | fisher_version: "h_1000_fix0708" 19 | 20 | 21 | seed: 2 22 | main_output_dir: 'runs/roberta-base/coeff_grid_search_new_51' 23 | output_dir_keys: ['seed'] 24 | default_model_args: 25 | model_name: "{resource_dir}/roberta-base" 26 | learning_rate: 2.0e-5 27 | num_train_epochs: 3.0 28 | version: 1 29 | zoo_filter: 30 | version: 1 31 | seed: 2 32 | do_lower_case: false 33 | per_device_train_batch_size: 16 34 | tokenizer: "{resource_dir}/roberta-base" 35 | model_type: roberta 36 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/glue/roberta-coeff-50.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | evaluate_locals_before: false 11 | 12 | merger: 13 | exclude_param_regex: ['.*pre_classifier.*','.*classifier.*'] 14 | coeff_search_method: 'grid' 15 | n_trials: 51 16 | 17 | 18 | seed: 2 19 | main_output_dir: 'runs/roberta-base/coeff_grid_search_new_51' 20 | output_dir_keys: ['seed'] 21 | default_model_args: 22 | model_name: "{resource_dir}/roberta-base" 23 | learning_rate: 2.0e-5 24 | num_train_epochs: 3.0 25 | version: 1 26 | zoo_filter: 27 | version: 1 28 | seed: 2 29 | do_lower_case: false 30 | per_device_train_batch_size: 16 31 | tokenizer: "{resource_dir}/roberta-base" 32 | model_type: roberta 33 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/distilbert-base-ner-mtl.yaml: -------------------------------------------------------------------------------- 1 | # required_resources: 2 | # 3 | # partition_files: "s3://ANONYMOUS/partition_files" 4 | # roberta-base: "s3://ANONYMOUS/roberta-base" 5 | # huggingface: "s3://ANONYMOUS/huggingface" 6 | # emotion_splits: "s3://ANONYMOUS/emotion_splits" 7 | # remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 8 | # load_from_zoo_use_remote: false 9 | #resource_dir: "resources" 10 | push_to_remote_zoo: false 11 | push_to_local_zoo: true 12 | 13 | evaluate_locals_ood: false 14 | 15 | seed: "{seed}" 16 | main_output_dir: 'runs/debug/mtl-new-seed{seed}' 17 | default_model_args: 18 | model_name: "{resource_dir}/distilbert-base-uncased" 19 | version: "hyp0812" 20 | zoo_filter: 21 | version: "hyp0812" 22 | seed: "{seed}" 23 | do_lower_case: false 24 | per_device_train_batch_size: 16 25 | lr_scheduler_type: "polynomial" 26 | warmup_ratio: 0.06 27 | learning_rate: 1.0e-5 28 | num_train_epochs: 20.0 29 | #adam_beta1: 0.9 30 | #adam_beta2: 0.98 31 | #adam_epsilon: 1.0e-6 32 | #max_grad_norm: 0.0 33 | save_strategy: "epoch" 34 | evaluation_strategy: "epoch" 35 | load_best_model_at_end: true 36 | metric_for_best_model: "key_score" 37 | tokenizer: "{resource_dir}/distilbert-base-uncased" 38 | model_type: distilbert 39 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/distilbert-base-ner.yaml: -------------------------------------------------------------------------------- 1 | # required_resources: 2 | # 3 | # partition_files: "s3://ANONYMOUS/partition_files" 4 | # roberta-base: "s3://ANONYMOUS/roberta-base" 5 | # huggingface: "s3://ANONYMOUS/huggingface" 6 | # emotion_splits: "s3://ANONYMOUS/emotion_splits" 7 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 8 | # load_from_zoo_use_remote: false 9 | # resource_dir: "resources" 10 | # push_to_remote_zoo: false 11 | # push_to_local_zoo: true 12 | 13 | 14 | #evaluate_locals_ood_after_merge: true 15 | evaluate_locals_before: true 16 | evaluate_locals_after: false 17 | 18 | seed: "{seed}" 19 | main_output_dir: 'runs/ner_debug/distilbert-seed{seed}' 20 | default_model_args: 21 | model_name: "{resource_dir}/distilbert-base-uncased" 22 | version: "hyp0812" 23 | zoo_filter: 24 | version: "hyp0812" 25 | seed: "{seed}" 26 | do_lower_case: false 27 | per_device_train_batch_size: 16 28 | lr_scheduler_type: "polynomial" 29 | warmup_ratio: 0.06 30 | learning_rate: 1.0e-5 31 | num_train_epochs: 20.0 32 | #adam_beta1: 0.9 33 | #adam_beta2: 0.98 34 | #adam_epsilon: 1.0e-6 35 | #max_grad_norm: 0.0 36 | save_strategy: "epoch" 37 | evaluation_strategy: "steps" 38 | load_best_model_at_end: true 39 | eval_steps: 5 40 | metric_for_best_model: "key_score" 41 | tokenizer: "{resource_dir}/distilbert-base-uncased" 42 | model_type: distilbert 43 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/ood/roberta-base-ner-ensemble.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_ensemble_ood: true 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | enabled: false 19 | 20 | ensembler: 21 | enabled: true 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/ner-roberta_base/ensemble-ood-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/roberta-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | tokenizer: "{resource_dir}/roberta-base" 46 | model_type: roberta-base 47 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/ood/roberta-base-ner-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0910_emp" 15 | emp_fisher: true 16 | 17 | 18 | evaluate_locals_ood_after_merge: true 19 | evaluate_locals_before: false 20 | evaluate_locals_after: false 21 | 22 | seed: "{seed}" 23 | main_output_dir: 'runs/ner-roberta_base/fisher-emp-ood-seed{seed}' 24 | default_model_args: 25 | model_name: "{resource_dir}/roberta-base" 26 | version: "hyp0812" 27 | zoo_filter: 28 | version: "hyp0812" 29 | seed: "{seed}" 30 | do_lower_case: false 31 | per_device_train_batch_size: 16 32 | lr_scheduler_type: "polynomial" 33 | warmup_ratio: 0.06 34 | learning_rate: 1.0e-5 35 | num_train_epochs: 20.0 36 | #adam_beta1: 0.9 37 | #adam_beta2: 0.98 38 | #adam_epsilon: 1.0e-6 39 | #max_grad_norm: 0.0 40 | save_strategy: "epoch" 41 | evaluation_strategy: "epoch" 42 | load_best_model_at_end: true 43 | metric_for_best_model: "key_score" 44 | tokenizer: "{resource_dir}/roberta-base" 45 | model_type: roberta-base 46 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/ood/roberta-base-ner-mtl-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: true 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | load_from_checkpoint: true 18 | load_dir: "/job/notebooks/ModelMerge/runs/ner-roberta_base/mtl-seed{seed}/" 19 | 20 | merger: 21 | enabled: false 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/ner-roberta_base/mtl-ood-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/roberta-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | tokenizer: "{resource_dir}/roberta-base" 46 | model_type: roberta 47 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/ood/roberta-base-ner-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | #regmean_exclude_param_regex: ['.*classifier.*'] 19 | regmean_exclude_param_regex: [] 20 | regmean_mean: true 21 | gram_n_example: 1000 22 | gram_version: "h_1000_0726_fix_withclassifier" 23 | regmean_reduce_nondiag: 0.1 24 | 25 | seed: "{seed}" 26 | main_output_dir: 'runs/ner-roberta_base/regmean-ood-seed{seed}' 27 | default_model_args: 28 | model_name: "{resource_dir}/roberta-base" 29 | version: "hyp0812" 30 | zoo_filter: 31 | version: "hyp0812" 32 | seed: "{seed}" 33 | do_lower_case: false 34 | per_device_train_batch_size: 16 35 | lr_scheduler_type: "polynomial" 36 | warmup_ratio: 0.06 37 | learning_rate: 1.0e-5 38 | num_train_epochs: 20.0 39 | #adam_beta1: 0.9 40 | #adam_beta2: 0.98 41 | #adam_epsilon: 1.0e-6 42 | #max_grad_norm: 0.0 43 | save_strategy: "epoch" 44 | evaluation_strategy: "epoch" 45 | load_best_model_at_end: true 46 | metric_for_best_model: "key_score" 47 | tokenizer: "{resource_dir}/roberta-base" 48 | model_type: roberta-base 49 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/ood/roberta-base-ner.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: true 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seed: "{seed}" 18 | main_output_dir: 'runs/ner-roberta_base/simple_avg-ood-seed{seed}' 19 | default_model_args: 20 | model_name: "{resource_dir}/roberta-base" 21 | version: "hyp0812" 22 | zoo_filter: 23 | version: "hyp0812" 24 | seed: "{seed}" 25 | do_lower_case: false 26 | per_device_train_batch_size: 16 27 | lr_scheduler_type: "polynomial" 28 | warmup_ratio: 0.06 29 | learning_rate: 1.0e-5 30 | num_train_epochs: 20.0 31 | #adam_beta1: 0.9 32 | #adam_beta2: 0.98 33 | #adam_epsilon: 1.0e-6 34 | #max_grad_norm: 0.0 35 | save_strategy: "epoch" 36 | evaluation_strategy: "epoch" 37 | load_best_model_at_end: true 38 | metric_for_best_model: "key_score" 39 | tokenizer: "{resource_dir}/roberta-base" 40 | model_type: roberta-base 41 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/roberta-base-ner-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0910_emp" 15 | emp_fisher: true 16 | 17 | 18 | evaluate_locals_ood_after_merge: true 19 | evaluate_locals_before: false 20 | evaluate_locals_after: false 21 | 22 | seed: "{seed}" 23 | main_output_dir: 'runs/ner-roberta_base/fisher-emp-seed{seed}' 24 | default_model_args: 25 | model_name: "{resource_dir}/roberta-base" 26 | version: "hyp0812" 27 | zoo_filter: 28 | version: "hyp0812" 29 | seed: "{seed}" 30 | do_lower_case: false 31 | per_device_train_batch_size: 16 32 | lr_scheduler_type: "polynomial" 33 | warmup_ratio: 0.06 34 | learning_rate: 1.0e-5 35 | num_train_epochs: 20.0 36 | #adam_beta1: 0.9 37 | #adam_beta2: 0.98 38 | #adam_epsilon: 1.0e-6 39 | #max_grad_norm: 0.0 40 | save_strategy: "epoch" 41 | evaluation_strategy: "epoch" 42 | load_best_model_at_end: true 43 | metric_for_best_model: "key_score" 44 | tokenizer: "{resource_dir}/roberta-base" 45 | model_type: roberta-base 46 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/roberta-base-ner-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | merger: 17 | enabled: false 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/ner-roberta_base/mtl-seed{seed}' 21 | default_model_args: 22 | model_name: "{resource_dir}/roberta-base" 23 | version: "hyp0812" 24 | zoo_filter: 25 | version: "hyp0812" 26 | seed: "{seed}" 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | lr_scheduler_type: "polynomial" 30 | warmup_ratio: 0.06 31 | learning_rate: 1.0e-5 32 | num_train_epochs: 20.0 33 | #adam_beta1: 0.9 34 | #adam_beta2: 0.98 35 | #adam_epsilon: 1.0e-6 36 | #max_grad_norm: 0.0 37 | save_strategy: "epoch" 38 | evaluation_strategy: "epoch" 39 | load_best_model_at_end: true 40 | metric_for_best_model: "key_score" 41 | tokenizer: "{resource_dir}/roberta-base" 42 | model_type: roberta 43 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/roberta-base-ner-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: true 16 | 17 | merger: 18 | #regmean_exclude_param_regex: ['.*classifier.*'] 19 | regmean_exclude_param_regex: [] 20 | regmean_mean: true 21 | gram_n_example: 1000 22 | gram_version: "h_1000_0726_fix_withclassifier" 23 | regmean_reduce_nondiag: 0.1 24 | 25 | 26 | seed: "{seed}" 27 | main_output_dir: 'runs/ner-roberta_base/regmean-seed{seed}' 28 | default_model_args: 29 | model_name: "{resource_dir}/roberta-base" 30 | version: "hyp0812" 31 | zoo_filter: 32 | version: "hyp0812" 33 | seed: "{seed}" 34 | do_lower_case: false 35 | per_device_train_batch_size: 16 36 | lr_scheduler_type: "polynomial" 37 | warmup_ratio: 0.06 38 | learning_rate: 1.0e-5 39 | num_train_epochs: 20.0 40 | #adam_beta1: 0.9 41 | #adam_beta2: 0.98 42 | #adam_epsilon: 1.0e-6 43 | #max_grad_norm: 0.0 44 | save_strategy: "epoch" 45 | evaluation_strategy: "epoch" 46 | load_best_model_at_end: true 47 | metric_for_best_model: "key_score" 48 | tokenizer: "{resource_dir}/roberta-base" 49 | model_type: roberta-base 50 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ner/roberta-base-ner.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | ner: "s3://ANONYMOUS/ner" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_before_merge: true 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seed: "{seed}" 18 | main_output_dir: 'runs/ner-roberta_base/simple_avg-seed{seed}' 19 | default_model_args: 20 | model_name: "{resource_dir}/roberta-base" 21 | version: "hyp0812" 22 | zoo_filter: 23 | version: "hyp0812" 24 | seed: "{seed}" 25 | do_lower_case: false 26 | per_device_train_batch_size: 16 27 | lr_scheduler_type: "polynomial" 28 | warmup_ratio: 0.06 29 | learning_rate: 1.0e-5 30 | num_train_epochs: 20.0 31 | #adam_beta1: 0.9 32 | #adam_beta2: 0.98 33 | #adam_epsilon: 1.0e-6 34 | #max_grad_norm: 0.0 35 | save_strategy: "epoch" 36 | evaluation_strategy: "epoch" 37 | load_best_model_at_end: true 38 | metric_for_best_model: "key_score" 39 | tokenizer: "{resource_dir}/roberta-base" 40 | model_type: roberta-base 41 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-ensemble-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_ensemble_locals: false 12 | evaluate_ensemble_ood: true 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | enabled: false 19 | 20 | ensembler: 21 | enabled: true 22 | handle_missing_label: true 23 | 24 | seed: "{seed}" 25 | main_output_dir: 'runs/emotion-roberta_base/ensemble-ood-seed{seed}' 26 | default_model_args: 27 | model_name: "{resource_dir}/roberta-base" 28 | version: "hyp0812" 29 | zoo_filter: 30 | version: "hyp0812" 31 | seed: "{seed}" 32 | do_lower_case: false 33 | per_device_train_batch_size: 16 34 | lr_scheduler_type: "polynomial" 35 | warmup_ratio: 0.06 36 | learning_rate: 1.0e-5 37 | num_train_epochs: 20.0 38 | #adam_beta1: 0.9 39 | #adam_beta2: 0.98 40 | #adam_epsilon: 1.0e-6 41 | #max_grad_norm: 0.0 42 | save_strategy: "epoch" 43 | evaluation_strategy: "epoch" 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | tokenizer: "{resource_dir}/roberta-base" 47 | model_type: roberta-base 48 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-fisher-norm-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708" 15 | fisher_normalize: "param" 16 | 17 | evaluate_locals_ood_after_merge: true 18 | evaluate_locals_before: false 19 | evaluate_locals_after: false 20 | 21 | seed: "{seed}" 22 | main_output_dir: 'runs/emotion-roberta_base/fisher-norm-ood10-seed{seed}' 23 | default_model_args: 24 | model_name: "{resource_dir}/roberta-base" 25 | version: "hyp0812" 26 | zoo_filter: 27 | version: "hyp0812" 28 | seed: "{seed}" 29 | do_lower_case: false 30 | per_device_train_batch_size: 16 31 | lr_scheduler_type: "polynomial" 32 | warmup_ratio: 0.06 33 | learning_rate: 1.0e-5 34 | num_train_epochs: 20.0 35 | #adam_beta1: 0.9 36 | #adam_beta2: 0.98 37 | #adam_epsilon: 1.0e-6 38 | #max_grad_norm: 0.0 39 | save_strategy: "epoch" 40 | evaluation_strategy: "epoch" 41 | load_best_model_at_end: true 42 | metric_for_best_model: "key_score" 43 | tokenizer: "{resource_dir}/roberta-base" 44 | model_type: roberta-base 45 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-fisher-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708" 15 | 16 | evaluate_locals_ood_after_merge: true 17 | evaluate_locals_before: false 18 | evaluate_locals_after: false 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-roberta_base/fisher-new-ood10-seed{seed}' 22 | default_model_args: 23 | model_name: "{resource_dir}/roberta-base" 24 | version: "hyp0812" 25 | zoo_filter: 26 | version: "hyp0812" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 1.0e-5 33 | num_train_epochs: 20.0 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "epoch" 39 | evaluation_strategy: "epoch" 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | tokenizer: "{resource_dir}/roberta-base" 43 | model_type: roberta-base 44 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-local-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/emotion-roberta_base/local-ood10-seed{seed}' 18 | default_model_args: 19 | model_name: "{resource_dir}/roberta-base" 20 | version: "hyp0812" 21 | zoo_filter: 22 | version: "hyp0812" 23 | seed: "{seed}" 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | lr_scheduler_type: "polynomial" 27 | warmup_ratio: 0.06 28 | learning_rate: 1.0e-5 29 | num_train_epochs: 20.0 30 | #adam_beta1: 0.9 31 | #adam_beta2: 0.98 32 | #adam_epsilon: 1.0e-6 33 | #max_grad_norm: 0.0 34 | save_strategy: "epoch" 35 | evaluation_strategy: "epoch" 36 | load_best_model_at_end: true 37 | metric_for_best_model: "key_score" 38 | tokenizer: "{resource_dir}/roberta-base" 39 | model_type: roberta-base 40 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-mtl-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: true 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | load_from_checkpoint: true 17 | 18 | seed: "{seed}" 19 | main_output_dir: 'runs/emotion-roberta_base/mtl-new-ood-all-seed{seed}' 20 | load_dir: '/job/notebooks/ModelMerge/runs/emotion-roberta_base/mtl-new-seed{seed}' 21 | default_model_args: 22 | model_name: "{resource_dir}/roberta-base" 23 | version: "hyp0812" 24 | zoo_filter: 25 | version: "hyp0812" 26 | seed: "{seed}" 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | lr_scheduler_type: "polynomial" 30 | warmup_ratio: 0.06 31 | learning_rate: 2.0e-5 32 | num_train_epochs: 20.0 33 | #adam_beta1: 0.9 34 | #adam_beta2: 0.98 35 | #adam_epsilon: 1.0e-6 36 | #max_grad_norm: 0.0 37 | save_strategy: "epoch" 38 | evaluation_strategy: "epoch" 39 | load_best_model_at_end: true 40 | metric_for_best_model: "key_score" 41 | reweight_loss_schema: "sqrt" 42 | tokenizer: "{resource_dir}/roberta-base" 43 | model_type: roberta 44 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: true 12 | evaluate_locals_before: false 13 | evaluate_locals_after: false 14 | 15 | seed: "{seed}" 16 | main_output_dir: 'runs/emotion-roberta_base/simple_avg-new-ood10-seed{seed}' 17 | default_model_args: 18 | model_name: "{resource_dir}/roberta-base" 19 | version: "hyp0812" 20 | zoo_filter: 21 | version: "hyp0812" 22 | seed: "{seed}" 23 | do_lower_case: false 24 | per_device_train_batch_size: 16 25 | lr_scheduler_type: "polynomial" 26 | warmup_ratio: 0.06 27 | learning_rate: 1.0e-5 28 | num_train_epochs: 20.0 29 | #adam_beta1: 0.9 30 | #adam_beta2: 0.98 31 | #adam_epsilon: 1.0e-6 32 | #max_grad_norm: 0.0 33 | save_strategy: "epoch" 34 | evaluation_strategy: "epoch" 35 | load_best_model_at_end: true 36 | metric_for_best_model: "key_score" 37 | tokenizer: "{resource_dir}/roberta-base" 38 | model_type: roberta-base 39 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-partial-regmean-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: true 12 | evaluate_locals_before: false 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | #regmean_exclude_param_regex: ['.*classifier.*'] 17 | exclude_param_regex: [".*layer.11.*", '.*classifier.*'] 18 | regmean_exclude_param_regex: [] 19 | regmean_mean: true 20 | gram_n_example: 1000 21 | gram_version: "h_1000_0726_fix_withclassifier" 22 | multi_label_head_special: false 23 | seed: "{seed}" 24 | main_output_dir: 'runs/emotion-roberta_base/regmean-partial-withclassifier-ood-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/roberta-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | tokenizer: "{resource_dir}/roberta-base" 46 | model_type: roberta-base 47 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-regmean-diag-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: true 12 | evaluate_locals_before: false 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | #regmean_exclude_param_regex: ['.*classifier.*'] 17 | regmean_exclude_param_regex: [] 18 | regmean_mean: true 19 | gram_n_example: 1000 20 | gram_version: "h_1000_0726_fix_withclassifier" 21 | regmean_diag: true 22 | seed: "{seed}" 23 | main_output_dir: 'runs/emotion-roberta_base/regmean-new-withclassifier-diag-ood-seed{seed}' 24 | default_model_args: 25 | model_name: "{resource_dir}/roberta-base" 26 | version: "hyp0812" 27 | zoo_filter: 28 | version: "hyp0812" 29 | seed: "{seed}" 30 | do_lower_case: false 31 | per_device_train_batch_size: 16 32 | lr_scheduler_type: "polynomial" 33 | warmup_ratio: 0.06 34 | learning_rate: 1.0e-5 35 | num_train_epochs: 20.0 36 | #adam_beta1: 0.9 37 | #adam_beta2: 0.98 38 | #adam_epsilon: 1.0e-6 39 | #max_grad_norm: 0.0 40 | save_strategy: "epoch" 41 | evaluation_strategy: "epoch" 42 | load_best_model_at_end: true 43 | metric_for_best_model: "key_score" 44 | tokenizer: "{resource_dir}/roberta-base" 45 | model_type: roberta-base 46 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-regmean-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: true 12 | evaluate_locals_before: false 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | #regmean_exclude_param_regex: ['.*classifier.*'] 17 | regmean_exclude_param_regex: [] 18 | regmean_mean: true 19 | gram_n_example: 1000 20 | gram_version: "h_1000_0726_fix_withclassifier" 21 | regmean_reduce_nondiag: 0.1 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/emotion-roberta_base/regmean-new-withclassifier-ood10-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/roberta-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | tokenizer: "{resource_dir}/roberta-base" 46 | model_type: roberta-base 47 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/ood/roberta-base-emotion-regmean-rw-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: true 12 | evaluate_locals_before: false 13 | evaluate_locals_after: false 14 | 15 | merger: 16 | #regmean_exclude_param_regex: ['.*classifier.*'] 17 | regmean_exclude_param_regex: [] 18 | regmean_mean: true 19 | gram_n_example: 1000 20 | gram_version: "h_1000_0726_fix_withclassifier" 21 | regmean_reduce_nondiag: 0.1 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/emotion-roberta_base/regmean-rw-withclassifier-ood-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/roberta-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | tokenizer: "{resource_dir}/roberta-base" 46 | model_type: roberta-base 47 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/roberta-base-emotion-ensemble.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | #evaluate_ensemble_locals: true 12 | evaluate_ensemble_ood: true 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | merger: 18 | enabled: false 19 | 20 | ensembler: 21 | enabled: true 22 | handle_missing_label: true 23 | 24 | seed: "{seed}" 25 | main_output_dir: 'runs/emotion-roberta_base/ensemble-seed{seed}' 26 | default_model_args: 27 | model_name: "{resource_dir}/roberta-base" 28 | version: "hyp0812" 29 | zoo_filter: 30 | version: "hyp0812" 31 | seed: "{seed}" 32 | do_lower_case: false 33 | per_device_train_batch_size: 16 34 | lr_scheduler_type: "polynomial" 35 | warmup_ratio: 0.06 36 | learning_rate: 1.0e-5 37 | num_train_epochs: 20.0 38 | #adam_beta1: 0.9 39 | #adam_beta2: 0.98 40 | #adam_epsilon: 1.0e-6 41 | #max_grad_norm: 0.0 42 | save_strategy: "epoch" 43 | evaluation_strategy: "epoch" 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | tokenizer: "{resource_dir}/roberta-base" 47 | model_type: roberta-base 48 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/roberta-base-emotion-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | merger: 12 | fisher_weighted: true 13 | fisher_n_example: 1000 14 | fisher_version: "h_1000_fix0708" 15 | 16 | evaluate_locals_ood_after_merge: true 17 | evaluate_locals_before: true 18 | evaluate_locals_after: false 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-roberta_base/fisher-new-ood-seed{seed}' 22 | default_model_args: 23 | model_name: "{resource_dir}/roberta-base" 24 | version: "hyp0812" 25 | zoo_filter: 26 | version: "hyp0812" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 1.0e-5 33 | num_train_epochs: 20.0 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "epoch" 39 | evaluation_strategy: "epoch" 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | tokenizer: "{resource_dir}/roberta-base" 43 | model_type: roberta-base 44 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/roberta-base-emotion-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_before: true 13 | evaluate_locals_after: false 14 | 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/emotion-roberta_base/mtl-greedy-seed{seed}' 18 | default_model_args: 19 | model_name: "{resource_dir}/roberta-base" 20 | version: "hyp0812" 21 | zoo_filter: 22 | version: "hyp0812" 23 | seed: "{seed}" 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | lr_scheduler_type: "polynomial" 27 | warmup_ratio: 0.06 28 | learning_rate: 2.0e-5 29 | num_train_epochs: 20.0 30 | #adam_beta1: 0.9 31 | #adam_beta2: 0.98 32 | #adam_epsilon: 1.0e-6 33 | #max_grad_norm: 0.0 34 | save_strategy: "epoch" 35 | evaluation_strategy: "epoch" 36 | load_best_model_at_end: true 37 | metric_for_best_model: "key_score" 38 | reweight_loss_schema: "sqrt" 39 | tokenizer: "{resource_dir}/roberta-base" 40 | model_type: roberta 41 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/roberta-base-emotion-ot.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_before: true 13 | evaluate_locals_after: true 14 | 15 | merger: 16 | algo: 'ot' 17 | ot_patterns: 18 | pattern0: 19 | ot_filter_regex: ['.*layer\.\d+$'] # roberta layer.2. 20 | ot_lin1: 'intermediate.dense' 21 | ot_lin2: 'output.dense' 22 | 23 | 24 | seed: "{seed}" 25 | main_output_dir: 'runs/emotion-roberta_base/ot-withclassifier-seed{seed}' 26 | default_model_args: 27 | model_name: "{resource_dir}/roberta-base" 28 | version: "hyp0812" 29 | zoo_filter: 30 | version: "hyp0812" 31 | seed: "{seed}" 32 | do_lower_case: false 33 | per_device_train_batch_size: 16 34 | lr_scheduler_type: "polynomial" 35 | warmup_ratio: 0.06 36 | learning_rate: 1.0e-5 37 | num_train_epochs: 20.0 38 | #adam_beta1: 0.9 39 | #adam_beta2: 0.98 40 | #adam_epsilon: 1.0e-6 41 | #max_grad_norm: 0.0 42 | save_strategy: "epoch" 43 | evaluation_strategy: "epoch" 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | tokenizer: "{resource_dir}/roberta-base" 47 | model_type: roberta-base 48 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/roberta-base-emotion-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_before: true 13 | evaluate_locals_after: true 14 | 15 | merger: 16 | #regmean_exclude_param_regex: ['.*classifier.*'] 17 | regmean_exclude_param_regex: [] 18 | regmean_mean: true 19 | gram_n_example: 1000 20 | gram_version: "h_1000_0726_fix_withclassifier" 21 | regmean_reduce_nondiag: 0.1 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/emotion-roberta_base/regmean-rw-withclassifier-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/roberta-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-5 36 | num_train_epochs: 20.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "epoch" 43 | load_best_model_at_end: true 44 | metric_for_best_model: "key_score" 45 | tokenizer: "{resource_dir}/roberta-base" 46 | model_type: roberta-base 47 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/roberta-base-emotion.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: true 14 | evaluate_locals_after: true 15 | 16 | seed: "{seed}" 17 | main_output_dir: 'runs/emotion-roberta_base/simple_avg-new-ood-seed{seed}' 18 | default_model_args: 19 | model_name: "{resource_dir}/roberta-base" 20 | version: "hyp0812" 21 | zoo_filter: 22 | version: "hyp0812" 23 | seed: "{seed}" 24 | do_lower_case: false 25 | per_device_train_batch_size: 16 26 | lr_scheduler_type: "polynomial" 27 | warmup_ratio: 0.06 28 | learning_rate: 1.0e-5 29 | num_train_epochs: 20.0 30 | #adam_beta1: 0.9 31 | #adam_beta2: 0.98 32 | #adam_epsilon: 1.0e-6 33 | #max_grad_norm: 0.0 34 | save_strategy: "epoch" 35 | evaluation_strategy: "epoch" 36 | load_best_model_at_end: true 37 | metric_for_best_model: "key_score" 38 | tokenizer: "{resource_dir}/roberta-base" 39 | model_type: roberta-base 40 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/subset/rb-1k-fisher-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | 13 | fisher_weighted: true 14 | fisher_n_example: 1000 15 | fisher_version: "h_1000_fix0726" 16 | exclude_param_regex: [] 17 | 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/glue-roberta-base/{partition_method}_1k_fisher/{dataset_name}/{seed}' 21 | 22 | default_model_args: 23 | model_name: "{resource_dir}/roberta-base" 24 | learning_rate: 1.0e-5 25 | num_train_epochs: 30.0 26 | version: "{partition_method}1k_0" 27 | zoo_filter: 28 | version: "{partition_method}1k_0" 29 | seed: "{seed}" 30 | num_train_epochs: 30.0 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | evaluation_strategy: "steps" 34 | eval_steps: 500 35 | 36 | 37 | tokenizer: "{resource_dir}/roberta-base" 38 | model_type: roberta 39 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/subset/rb-1k-regmean-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | regmean_mean: true 15 | gram_n_example: 1000 16 | gram_version: "h_1000_0726_fix_whead" 17 | exclude_param_regex: [] 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/glue-roberta-base/{partition_method}_1k_regmean/{dataset_name}/{seed}' 21 | 22 | default_model_args: 23 | model_name: "{resource_dir}/roberta-base" 24 | learning_rate: 1.0e-5 25 | num_train_epochs: 30.0 26 | version: "{partition_method}1k_0" 27 | zoo_filter: 28 | version: "{partition_method}1k_0" 29 | seed: "{seed}" 30 | num_train_epochs: 30.0 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | evaluation_strategy: "steps" 34 | eval_steps: 500 35 | 36 | 37 | tokenizer: "{resource_dir}/roberta-base" 38 | model_type: roberta 39 | -------------------------------------------------------------------------------- /src/configs/exps/roberta-base/subset/rb-1k-whead.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | roberta-base: "s3://ANONYMOUS/roberta-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 5 | load_from_zoo_use_remote: false 6 | resource_dir: "resources" 7 | push_to_remote_zoo: false 8 | push_to_local_zoo: true 9 | 10 | merger: 11 | coeff_search_method: null 12 | fisher_weighted: false 13 | 14 | # regmean_mean: true 15 | # gram_n_example: 1000 16 | # gram_version: "h_1000_0726_fix" 17 | exclude_param_regex: [] 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/glue-roberta-base/{partition_method}_1k/{dataset_name}/{seed}' 21 | 22 | default_model_args: 23 | model_name: "{resource_dir}/roberta-base" 24 | learning_rate: 1.0e-5 25 | num_train_epochs: 30.0 26 | version: "{partition_method}1k_0" 27 | zoo_filter: 28 | version: "{partition_method}1k_0" 29 | seed: "{seed}" 30 | num_train_epochs: 30.0 31 | do_lower_case: true 32 | per_device_train_batch_size: 16 33 | evaluation_strategy: "steps" 34 | eval_steps: 500 35 | 36 | tokenizer: "{resource_dir}/roberta-base" 37 | model_type: roberta 38 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-ensemble-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_ensemble_ood: true 12 | evaluate_locals_ood_before_merge: false 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seq2seq: true 18 | 19 | resample_schema: sqrt 20 | 21 | merger: 22 | enabled: false 23 | 24 | debug: true 25 | 26 | ensembler: 27 | enabled: true 28 | handle_missing_label: false 29 | hard_ensemble: true 30 | seed: "{seed}" 31 | main_output_dir: 'runs/emotion-t5_base/ensemble-ood-seed{seed}' 32 | default_model_args: 33 | model_name: "{resource_dir}/t5-v1_1-base" 34 | version: "s50k" 35 | zoo_filter: 36 | version: "s50k" 37 | seed: "{seed}" 38 | do_lower_case: false 39 | per_device_train_batch_size: 16 40 | lr_scheduler_type: "polynomial" 41 | warmup_ratio: 0.06 42 | learning_rate: 1.0e-4 43 | max_steps: 50000 44 | #adam_beta1: 0.9 45 | #adam_beta2: 0.98 46 | #adam_epsilon: 1.0e-6 47 | #max_grad_norm: 0.0 48 | save_strategy: "steps" 49 | evaluation_strategy: "steps" 50 | eval_steps: 5000 51 | load_best_model_at_end: true 52 | metric_for_best_model: "key_score" 53 | generation_max_length: 4 54 | generation_num_beams: 1 55 | predict_with_generate: true 56 | include_inputs_for_metrics: true 57 | tokenizer: "{resource_dir}/t5-v1_1-base" 58 | model_type: t5 59 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-fisher-norm-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: sqrt 12 | 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | fisher_weighted: true 21 | fisher_n_example: 1000 22 | fisher_version: "h_1000_fix0708" 23 | emp_fisher: true 24 | fisher_normalize: "param" 25 | 26 | seed: "{seed}" 27 | main_output_dir: 'runs/emotion-t5_base/fisher-norm-ood-seed{seed}' 28 | default_model_args: 29 | model_name: "{resource_dir}/t5-v1_1-base" 30 | version: "s50k" 31 | zoo_filter: 32 | version: "s50k" 33 | seed: "{seed}" 34 | do_lower_case: false 35 | per_device_train_batch_size: 16 36 | lr_scheduler_type: "polynomial" 37 | warmup_ratio: 0.06 38 | learning_rate: 1.0e-4 39 | max_steps: 50000 40 | #adam_beta1: 0.9 41 | #adam_beta2: 0.98 42 | #adam_epsilon: 1.0e-6 43 | #max_grad_norm: 0.0 44 | save_strategy: "steps" 45 | evaluation_strategy: "steps" 46 | eval_steps: 5000 47 | load_best_model_at_end: true 48 | metric_for_best_model: "key_score" 49 | generation_max_length: 4 50 | generation_num_beams: 1 51 | predict_with_generate: true 52 | include_inputs_for_metrics: true 53 | tokenizer: "{resource_dir}/t5-v1_1-base" 54 | model_type: t5 55 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-fisher-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: sqrt 12 | 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | fisher_weighted: true 21 | fisher_n_example: 1000 22 | fisher_version: "h_1000_fix0708" 23 | emp_fisher: true 24 | 25 | seed: "{seed}" 26 | main_output_dir: 'runs/emotion-t5_base/fisher-ood-seed{seed}' 27 | default_model_args: 28 | model_name: "{resource_dir}/t5-v1_1-base" 29 | version: "s50k" 30 | zoo_filter: 31 | version: "s50k" 32 | seed: "{seed}" 33 | do_lower_case: false 34 | per_device_train_batch_size: 16 35 | lr_scheduler_type: "polynomial" 36 | warmup_ratio: 0.06 37 | learning_rate: 1.0e-4 38 | max_steps: 50000 39 | #adam_beta1: 0.9 40 | #adam_beta2: 0.98 41 | #adam_epsilon: 1.0e-6 42 | #max_grad_norm: 0.0 43 | save_strategy: "steps" 44 | evaluation_strategy: "steps" 45 | eval_steps: 5000 46 | load_best_model_at_end: true 47 | metric_for_best_model: "key_score" 48 | generation_max_length: 4 49 | generation_num_beams: 1 50 | predict_with_generate: true 51 | include_inputs_for_metrics: true 52 | tokenizer: "{resource_dir}/t5-v1_1-base" 53 | model_type: t5 54 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-mtl-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: false 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | seq2seq: true 17 | 18 | resample_schema: sqrt 19 | 20 | seed: "{seed}" 21 | 22 | merger: 23 | enabled: false 24 | 25 | load_from_checkpoint: true 26 | load_dir: '/job/notebooks/ModelMerge/runs/emotion-t5_base/mtl-new-seed{seed}' 27 | 28 | main_output_dir: 'runs/emotion-t5_base/mtl-ood-new-seed{seed}' 29 | default_model_args: 30 | model_name: "{resource_dir}/t5-v1_1-base" 31 | version: "s50k" 32 | zoo_filter: 33 | version: "s50k" 34 | seed: "{seed}" 35 | do_lower_case: false 36 | per_device_train_batch_size: 16 37 | lr_scheduler_type: "polynomial" 38 | warmup_ratio: 0.06 39 | learning_rate: 1.0e-4 40 | max_steps: 150000 41 | #adam_beta1: 0.9 42 | #adam_beta2: 0.98 43 | #adam_epsilon: 1.0e-6 44 | #max_grad_norm: 0.0 45 | save_strategy: "steps" 46 | evaluation_strategy: "steps" 47 | eval_steps: 10 48 | load_best_model_at_end: true 49 | metric_for_best_model: "key_score" 50 | generation_max_length: 4 51 | generation_num_beams: 1 52 | predict_with_generate: true 53 | include_inputs_for_metrics: true 54 | tokenizer: "{resource_dir}/t5-v1_1-base" 55 | model_type: t5 56 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_before_merge: true 12 | evaluate_locals_ood_after_merge: true 13 | evaluate_locals_before: false 14 | evaluate_locals_after: false 15 | 16 | seq2seq: true 17 | 18 | resample_schema: sqrt 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-t5_base/simple_avg-new-seed{seed}' 22 | default_model_args: 23 | model_name: "{resource_dir}/t5-v1_1-base" 24 | version: "s50k" 25 | zoo_filter: 26 | version: "s50k" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 1.0e-4 33 | max_steps: 50000 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "steps" 39 | evaluation_strategy: "steps" 40 | eval_steps: 5000 41 | load_best_model_at_end: true 42 | metric_for_best_model: "key_score" 43 | generation_max_length: 4 44 | generation_num_beams: 1 45 | predict_with_generate: true 46 | include_inputs_for_metrics: true 47 | tokenizer: "{resource_dir}/t5-v1_1-base" 48 | model_type: t5 49 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-regmean-full-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: "sqrt" 12 | 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | regmean_exclude_param_regex: [] 21 | #regmean_exclude_param_regex: [] 22 | regmean_mean: true 23 | gram_n_example: 1000 24 | regmean_diag: true 25 | #gram_version: "h_1000_0726_fix_withclassifier" 26 | gram_version: "h_1000_0909_resample" 27 | seed: "{seed}" 28 | main_output_dir: 'runs/emotion-t5_base/regmean-ood-nodechead-diag-full-resample-seed{seed}' 29 | default_model_args: 30 | model_name: "{resource_dir}/t5-v1_1-base" 31 | version: "s50k" 32 | zoo_filter: 33 | version: "s50k" 34 | seed: "{seed}" 35 | do_lower_case: false 36 | per_device_train_batch_size: 16 37 | lr_scheduler_type: "polynomial" 38 | warmup_ratio: 0.06 39 | learning_rate: 1.0e-4 40 | max_steps: 50000 41 | #adam_beta1: 0.9 42 | #adam_beta2: 0.98 43 | #adam_epsilon: 1.0e-6 44 | #max_grad_norm: 0.0 45 | save_strategy: "steps" 46 | evaluation_strategy: "steps" 47 | eval_steps: 5000 48 | load_best_model_at_end: true 49 | metric_for_best_model: "key_score" 50 | generation_max_length: 4 51 | generation_num_beams: 1 52 | predict_with_generate: true 53 | include_inputs_for_metrics: true 54 | tokenizer: "{resource_dir}/t5-v1_1-base" 55 | model_type: t5 56 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-regmean-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: "sqrt" 12 | 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | regmean_exclude_param_regex: [] 21 | #regmean_exclude_param_regex: [] 22 | regmean_mean: true 23 | gram_n_example: 1000 24 | #gram_version: "h_1000_0726_fix_withclassifier" 25 | gram_version: "h_1000_0909_resample" 26 | regmean_reduce_nondiag: 0.1 27 | 28 | seed: "{seed}" 29 | main_output_dir: 'runs/emotion-t5_base/regmean-ood-nodechead-diag-resample-seed{seed}' 30 | default_model_args: 31 | model_name: "{resource_dir}/t5-v1_1-base" 32 | version: "s50k" 33 | zoo_filter: 34 | version: "s50k" 35 | seed: "{seed}" 36 | do_lower_case: false 37 | per_device_train_batch_size: 16 38 | lr_scheduler_type: "polynomial" 39 | warmup_ratio: 0.06 40 | learning_rate: 1.0e-4 41 | max_steps: 50000 42 | #adam_beta1: 0.9 43 | #adam_beta2: 0.98 44 | #adam_epsilon: 1.0e-6 45 | #max_grad_norm: 0.0 46 | save_strategy: "steps" 47 | evaluation_strategy: "steps" 48 | eval_steps: 5000 49 | load_best_model_at_end: true 50 | metric_for_best_model: "key_score" 51 | generation_max_length: 4 52 | generation_num_beams: 1 53 | predict_with_generate: true 54 | include_inputs_for_metrics: true 55 | tokenizer: "{resource_dir}/t5-v1_1-base" 56 | model_type: t5 57 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-regmean-ood2.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | # resample_schema: sqrt 12 | 13 | evaluate_locals_ood_after_merge: true 14 | evaluate_locals_before: false 15 | evaluate_locals_after: false 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | regmean_exclude_param_regex: ['.*decoder.*','.*lm_head.*','.*shared.*','.*layer_norm.*','.*encoder.*'] 21 | #regmean_exclude_param_regex: [] 22 | regmean_mean: true 23 | gram_n_example: 1000 24 | #gram_version: "h_1000_0726_fix_withclassifier" 25 | gram_version: "h_1000_0909_noresample" 26 | seed: "{seed}" 27 | main_output_dir: 'runs/emotion-t5_base/regmean-ood2-nodechead-noresample-seed{seed}' 28 | default_model_args: 29 | model_name: "{resource_dir}/t5-v1_1-base" 30 | version: "s50k" 31 | zoo_filter: 32 | version: "s50k" 33 | seed: "{seed}" 34 | do_lower_case: false 35 | per_device_train_batch_size: 16 36 | lr_scheduler_type: "polynomial" 37 | warmup_ratio: 0.06 38 | learning_rate: 1.0e-4 39 | max_steps: 50000 40 | #adam_beta1: 0.9 41 | #adam_beta2: 0.98 42 | #adam_epsilon: 1.0e-6 43 | #max_grad_norm: 0.0 44 | save_strategy: "steps" 45 | evaluation_strategy: "steps" 46 | eval_steps: 5000 47 | load_best_model_at_end: true 48 | metric_for_best_model: "key_score" 49 | generation_max_length: 4 50 | generation_num_beams: 1 51 | predict_with_generate: true 52 | include_inputs_for_metrics: true 53 | tokenizer: "{resource_dir}/t5-v1_1-base" 54 | model_type: t5 55 | -------------------------------------------------------------------------------- /src/configs/exps/t5/ood/t5-base-emotion-regmean-rw-ood.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: "sqrt" 12 | 13 | templates: 14 | rw: -1.0 15 | 16 | evaluate_locals_ood_after_merge: true 17 | evaluate_locals_before: false 18 | evaluate_locals_after: false 19 | 20 | seq2seq: true 21 | 22 | merger: 23 | regmean_exclude_param_regex: [] 24 | #regmean_exclude_param_regex: [] 25 | regmean_mean: true 26 | gram_n_example: 1000 27 | #gram_version: "h_1000_0726_fix_withclassifier" 28 | gram_version: "h_1000_0909_resample" 29 | regmean_reduce_nondiag: "{rw}" 30 | 31 | seed: "{seed}" 32 | main_output_dir: 'runs/emotion-t5_base/regmean-rw-{rw}-ood-resample-seed{seed}' 33 | default_model_args: 34 | model_name: "{resource_dir}/t5-v1_1-base" 35 | version: "s50k" 36 | zoo_filter: 37 | version: "s50k" 38 | seed: "{seed}" 39 | do_lower_case: false 40 | per_device_train_batch_size: 16 41 | lr_scheduler_type: "polynomial" 42 | warmup_ratio: 0.06 43 | learning_rate: 1.0e-4 44 | max_steps: 50000 45 | #adam_beta1: 0.9 46 | #adam_beta2: 0.98 47 | #adam_epsilon: 1.0e-6 48 | #max_grad_norm: 0.0 49 | save_strategy: "steps" 50 | evaluation_strategy: "steps" 51 | eval_steps: 5000 52 | load_best_model_at_end: true 53 | metric_for_best_model: "key_score" 54 | generation_max_length: 4 55 | generation_num_beams: 1 56 | predict_with_generate: true 57 | include_inputs_for_metrics: true 58 | tokenizer: "{resource_dir}/t5-v1_1-base" 59 | model_type: t5 60 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-emotion-debug.yaml: -------------------------------------------------------------------------------- 1 | # required_resources: 2 | # 3 | # partition_files: "s3://ANONYMOUS/partition_files" 4 | # t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 5 | # huggingface: "s3://ANONYMOUS/huggingface" 6 | # emotion_splits: "s3://ANONYMOUS/emotion_splits" 7 | # remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 8 | # load_from_zoo_use_remote: false 9 | # resource_dir: "resources" 10 | push_to_remote_zoo: false 11 | push_to_local_zoo: true 12 | 13 | debug: true 14 | 15 | 16 | evaluate_locals_ood_after_merge: true 17 | evaluate_locals_before: true 18 | evaluate_locals_after: false 19 | 20 | seq2seq: true 21 | resample_schema: sqrt 22 | 23 | seed: "{seed}" 24 | main_output_dir: 'runs/emotion-t5_base/simple_avg-debug-seed{seed}' 25 | default_model_args: 26 | model_name: "{resource_dir}/t5-v1_1-base" 27 | version: "hyp0812" 28 | zoo_filter: 29 | version: "hyp0812" 30 | seed: "{seed}" 31 | do_lower_case: false 32 | per_device_train_batch_size: 16 33 | lr_scheduler_type: "polynomial" 34 | warmup_ratio: 0.06 35 | learning_rate: 1.0e-4 36 | num_train_epochs: 3.0 37 | #adam_beta1: 0.9 38 | #adam_beta2: 0.98 39 | #adam_epsilon: 1.0e-6 40 | #max_grad_norm: 0.0 41 | save_strategy: "epoch" 42 | evaluation_strategy: "steps" 43 | eval_steps: 2 44 | load_best_model_at_end: true 45 | metric_for_best_model: "key_score" 46 | generation_max_length: 4 47 | generation_num_beams: 1 48 | predict_with_generate: true 49 | include_inputs_for_metrics: true 50 | tokenizer: "{resource_dir}/t5-v1_1-base" 51 | model_type: t5 52 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-emotion-fisher.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: sqrt 12 | 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: true 15 | evaluate_locals_after: true 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | fisher_weighted: true 21 | fisher_n_example: 1000 22 | fisher_version: "h_1000_fix0708" 23 | emp_fisher: true 24 | 25 | seed: "{seed}" 26 | main_output_dir: 'runs/emotion-t5_base/fisher-seed{seed}' 27 | default_model_args: 28 | model_name: "{resource_dir}/t5-v1_1-base" 29 | version: "s50k" 30 | zoo_filter: 31 | version: "s50k" 32 | seed: "{seed}" 33 | do_lower_case: false 34 | per_device_train_batch_size: 16 35 | lr_scheduler_type: "polynomial" 36 | warmup_ratio: 0.06 37 | learning_rate: 1.0e-4 38 | max_steps: 50000 39 | #adam_beta1: 0.9 40 | #adam_beta2: 0.98 41 | #adam_epsilon: 1.0e-6 42 | #max_grad_norm: 0.0 43 | save_strategy: "steps" 44 | evaluation_strategy: "steps" 45 | eval_steps: 5000 46 | load_best_model_at_end: true 47 | metric_for_best_model: "key_score" 48 | generation_max_length: 4 49 | generation_num_beams: 1 50 | predict_with_generate: true 51 | include_inputs_for_metrics: true 52 | tokenizer: "{resource_dir}/t5-v1_1-base" 53 | model_type: t5 54 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-emotion-mtl.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: false 12 | evaluate_locals_before: true 13 | evaluate_locals_after: false 14 | 15 | seq2seq: true 16 | 17 | resample_schema: sqrt 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/emotion-t5_base/mtl-new-seed{seed}' 21 | default_model_args: 22 | model_name: "{resource_dir}/t5-v1_1-base" 23 | version: "s50k" 24 | zoo_filter: 25 | version: "s50k" 26 | seed: "{seed}" 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | lr_scheduler_type: "polynomial" 30 | warmup_ratio: 0.06 31 | learning_rate: 1.0e-4 32 | max_steps: 150000 33 | #adam_beta1: 0.9 34 | #adam_beta2: 0.98 35 | #adam_epsilon: 1.0e-6 36 | #max_grad_norm: 0.0 37 | save_strategy: "steps" 38 | evaluation_strategy: "steps" 39 | eval_steps: 10000 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | generation_max_length: 4 43 | generation_num_beams: 1 44 | predict_with_generate: true 45 | include_inputs_for_metrics: true 46 | tokenizer: "{resource_dir}/t5-v1_1-base" 47 | model_type: t5 48 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-emotion-regmean-diag.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: sqrt 12 | 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: true 15 | evaluate_locals_after: true 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | regmean_exclude_param_regex: ['.*decoder.*','.*lm_head.*'] 21 | #regmean_exclude_param_regex: [] 22 | regmean_mean: true 23 | gram_n_example: 1000 24 | #gram_version: "h_1000_0726_fix_withclassifier" 25 | gram_version: "h_1000_0909_resample" 26 | regmean_diag: true 27 | seed: "{seed}" 28 | main_output_dir: 'runs/emotion-t5_base/regmean-nodeclmhead-resample-diag-seed{seed}' 29 | default_model_args: 30 | model_name: "{resource_dir}/t5-v1_1-base" 31 | version: "s50k" 32 | zoo_filter: 33 | version: "s50k" 34 | seed: "{seed}" 35 | do_lower_case: false 36 | per_device_train_batch_size: 16 37 | lr_scheduler_type: "polynomial" 38 | warmup_ratio: 0.06 39 | learning_rate: 1.0e-4 40 | max_steps: 50000 41 | #adam_beta1: 0.9 42 | #adam_beta2: 0.98 43 | #adam_epsilon: 1.0e-6 44 | #max_grad_norm: 0.0 45 | save_strategy: "steps" 46 | evaluation_strategy: "steps" 47 | eval_steps: 5000 48 | load_best_model_at_end: true 49 | metric_for_best_model: "key_score" 50 | generation_max_length: 4 51 | generation_num_beams: 1 52 | predict_with_generate: true 53 | include_inputs_for_metrics: true 54 | tokenizer: "{resource_dir}/t5-v1_1-base" 55 | model_type: t5 56 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-emotion-regmean.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | resample_schema: sqrt 12 | 13 | evaluate_locals_ood_after_merge: false 14 | evaluate_locals_before: false 15 | evaluate_locals_after: true 16 | 17 | seq2seq: true 18 | 19 | merger: 20 | regmean_exclude_param_regex: [] 21 | #regmean_exclude_param_regex: [] 22 | regmean_mean: true 23 | gram_n_example: 1000 24 | #gram_version: "h_1000_0726_fix_withclassifier" 25 | gram_version: "h_1000_0909_resample" 26 | regmean_reduce_nondiag: 0.1 27 | seed: "{seed}" 28 | main_output_dir: 'runs/emotion-t5_base/regmean-nondiag-rw-seed{seed}' 29 | default_model_args: 30 | model_name: "{resource_dir}/t5-v1_1-base" 31 | version: "s50k" 32 | zoo_filter: 33 | version: "s50k" 34 | seed: "{seed}" 35 | do_lower_case: false 36 | per_device_train_batch_size: 16 37 | lr_scheduler_type: "polynomial" 38 | warmup_ratio: 0.06 39 | learning_rate: 1.0e-4 40 | max_steps: 50000 41 | #adam_beta1: 0.9 42 | #adam_beta2: 0.98 43 | #adam_epsilon: 1.0e-6 44 | #max_grad_norm: 0.0 45 | save_strategy: "steps" 46 | evaluation_strategy: "steps" 47 | eval_steps: 5000 48 | load_best_model_at_end: true 49 | metric_for_best_model: "key_score" 50 | generation_max_length: 4 51 | generation_num_beams: 1 52 | predict_with_generate: true 53 | include_inputs_for_metrics: true 54 | tokenizer: "{resource_dir}/t5-v1_1-base" 55 | model_type: t5 56 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-emotion.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: false 12 | evaluate_locals_before: true 13 | evaluate_locals_after: true 14 | 15 | seq2seq: true 16 | 17 | resample_schema: sqrt 18 | 19 | seed: "{seed}" 20 | main_output_dir: 'runs/emotion-t5_base/simple_avg-new-seed{seed}' 21 | default_model_args: 22 | model_name: "{resource_dir}/t5-v1_1-base" 23 | version: "s50k" 24 | zoo_filter: 25 | version: "s50k" 26 | seed: "{seed}" 27 | do_lower_case: false 28 | per_device_train_batch_size: 16 29 | lr_scheduler_type: "polynomial" 30 | warmup_ratio: 0.06 31 | learning_rate: 1.0e-4 32 | max_steps: 50000 33 | #adam_beta1: 0.9 34 | #adam_beta2: 0.98 35 | #adam_epsilon: 1.0e-6 36 | #max_grad_norm: 0.0 37 | save_strategy: "steps" 38 | evaluation_strategy: "steps" 39 | eval_steps: 5000 40 | load_best_model_at_end: true 41 | metric_for_best_model: "key_score" 42 | generation_max_length: 4 43 | generation_num_beams: 1 44 | predict_with_generate: true 45 | include_inputs_for_metrics: true 46 | tokenizer: "{resource_dir}/t5-v1_1-base" 47 | model_type: t5 48 | -------------------------------------------------------------------------------- /src/configs/exps/t5/t5-base-local.yaml: -------------------------------------------------------------------------------- 1 | required_resources: 2 | t5-v1_1-base: "s3://ANONYMOUS/t5-v1_1-base" 3 | huggingface: "s3://ANONYMOUS/huggingface" 4 | emotion_splits: "s3://ANONYMOUS/emotion_splits" 5 | remote_zoo_dir: "s3://ANONYMOUS/local_models_zoo" 6 | load_from_zoo_use_remote: false 7 | resource_dir: "resources" 8 | push_to_remote_zoo: false 9 | push_to_local_zoo: true 10 | 11 | evaluate_locals_ood_after_merge: false 12 | evaluate_locals_before: true 13 | evaluate_locals_after: true 14 | 15 | seq2seq: true 16 | merger: 17 | enabled: false 18 | resample_schema: sqrt 19 | 20 | seed: "{seed}" 21 | main_output_dir: 'runs/emotion-t5_base/simple_avg-seed{seed}/{postfix}' 22 | default_model_args: 23 | model_name: "{resource_dir}/t5-v1_1-base" 24 | version: "s50k" 25 | zoo_filter: 26 | version: "s50k" 27 | seed: "{seed}" 28 | do_lower_case: false 29 | per_device_train_batch_size: 16 30 | lr_scheduler_type: "polynomial" 31 | warmup_ratio: 0.06 32 | learning_rate: 1.0e-4 33 | max_steps: 50000 34 | #adam_beta1: 0.9 35 | #adam_beta2: 0.98 36 | #adam_epsilon: 1.0e-6 37 | #max_grad_norm: 0.0 38 | save_strategy: "steps" 39 | evaluation_strategy: "steps" 40 | eval_steps: 5000 41 | load_best_model_at_end: true 42 | metric_for_best_model: "key_score" 43 | generation_max_length: 4 44 | generation_num_beams: 1 45 | predict_with_generate: true 46 | include_inputs_for_metrics: true 47 | tokenizer: "{resource_dir}/t5-v1_1-base" 48 | model_type: t5 49 | -------------------------------------------------------------------------------- /src/data_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .glue_data_manager import GLUEDataManager 16 | from .emotion_data_manager import EmotionDataManager 17 | from .ner_data_manager import NERDataManager 18 | from .emotion_gen_data_manager import EmotionGenDataManager 19 | 20 | DM_CLASS_MAP = { 21 | "glue": GLUEDataManager, 22 | "emotion": EmotionDataManager, 23 | "ner": NERDataManager, 24 | } 25 | 26 | DM_CLASS_MAP_GEN = { 27 | "emotion": EmotionGenDataManager, 28 | } 29 | 30 | 31 | def get_dm_class(dataset_name, seq2seq): 32 | if seq2seq: 33 | return DM_CLASS_MAP_GEN[dataset_name] 34 | else: 35 | return DM_CLASS_MAP[dataset_name] 36 | -------------------------------------------------------------------------------- /src/data_manager/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bloomberg/dataless-model-merging/92247aece13824c64f774b36732d297e9c5d7bc7/src/data_manager/metrics/__init__.py -------------------------------------------------------------------------------- /src/data_manager/simple_data_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from torch.utils.data import ConcatDataset, Dataset 16 | 17 | 18 | class SimpleDataManager: 19 | def __init__(self, config, tokenizer): 20 | self.config = config 21 | self.tokenizer = tokenizer 22 | self.attributes = None 23 | self.partitions = None 24 | 25 | def join_datasets(self, datasets): 26 | ds = ConcatDataset(datasets) 27 | return ds 28 | 29 | 30 | class FeatureDataset(Dataset): 31 | def __init__(self, features) -> None: 32 | super().__init__() 33 | self.features = features 34 | 35 | def __len__(self): 36 | return len(self.features) 37 | 38 | def __getitem__(self, i): 39 | return self.features[i] 40 | -------------------------------------------------------------------------------- /src/model_merge/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /src/model_merge/ensembler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | from collections import Counter 17 | 18 | import numpy as np 19 | 20 | 21 | class Struct: 22 | def __init__(self, **kwargs): 23 | for k, v in kwargs.items(): 24 | setattr(self, k, v) 25 | 26 | 27 | def most_common(lst): 28 | data = Counter(lst) 29 | return max(lst, key=data.get) 30 | 31 | 32 | class Ensembler: 33 | # this is not a torch module 34 | def __init__(self, local_models): 35 | self.local_models = local_models 36 | 37 | def evaluate_ensemble(self, dataset): 38 | outputs = [] 39 | 40 | label_freqs = [] 41 | 42 | for local_model in self.local_models: 43 | _, output = local_model.evaluate(dataset, return_output=True) 44 | if self.local_models[0].config.ensembler.handle_missing_label: 45 | label_freqs.append(local_model.model_config.freq_dist) 46 | outputs.append(output) 47 | 48 | logging.info("Ensembling predictions of local models") 49 | 50 | if self.local_models[0].config.ensembler.handle_missing_label: 51 | logging.info("Handling missing label in some local models") 52 | # print(label_freqs) 53 | ensemble_preds = np.zeros_like(outputs[0].predictions) 54 | ns = np.zeros(ensemble_preds.shape[1]) 55 | for label_freq, output in zip(label_freqs, outputs): 56 | for label_id, _ in label_freq.items(): 57 | ensemble_preds[:, label_id] += output.predictions[:, label_id] 58 | ns[label_id] += 1 59 | ns += 1e-10 60 | preds = ensemble_preds / np.tile( 61 | np.expand_dims(ns, 0), (ensemble_preds.shape[0], 1) 62 | ) 63 | elif self.local_models[0].config.ensembler.hard_ensemble: 64 | # print(output.predictions[0]) 65 | ret = np.zeros_like(outputs[0].predictions) 66 | it = np.nditer(ret, flags=["multi_index"]) 67 | for _ in it: 68 | items = [output.predictions[it.multi_index] for output in outputs] 69 | v = most_common(items) 70 | ret[it.multi_index] = v 71 | preds = ret 72 | else: 73 | preds = np.stack([output.predictions for output in outputs]) 74 | preds = np.mean(preds, 0) 75 | 76 | ensemble_output = Struct(predictions=preds, label_ids=outputs[0].label_ids) 77 | 78 | # print(outputs[0]) 79 | if hasattr(outputs[0], "inputs"): 80 | ensemble_output.inputs = outputs[0].inputs 81 | 82 | # assume that compute metrics func are the same 83 | compute_metrics_func = self.local_models[0].trainer.compute_metrics 84 | met = compute_metrics_func(ensemble_output) 85 | 86 | return met 87 | -------------------------------------------------------------------------------- /src/model_merge/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | 18 | def filter_params_to_merge(param_names, exclude_param_regex): 19 | params_to_merge = [] 20 | for name in param_names: 21 | valid = not any([re.match(patt, name) for patt in exclude_param_regex]) 22 | if valid: 23 | params_to_merge.append(name) 24 | return params_to_merge 25 | 26 | 27 | def filter_modules_by_regex(base_module, include_patterns, include_type): 28 | modules = {} 29 | for name, module in base_module.named_modules(): 30 | valid_name = not include_patterns or any( 31 | [re.match(patt, name) for patt in include_patterns] 32 | ) 33 | valid_type = not include_type or any( 34 | [isinstance(module, md_cls) for md_cls in include_type] 35 | ) 36 | if valid_type and valid_name: 37 | modules[name] = module 38 | return modules 39 | -------------------------------------------------------------------------------- /src/remote_io/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .get_resources import ( 16 | ls, 17 | transfer, 18 | ropen, 19 | check_and_get_remote_resources, 20 | get_remote_config, 21 | upload_runs, 22 | get_remote_args, 23 | ) 24 | -------------------------------------------------------------------------------- /src/remote_io/get_resources.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | 18 | from datasets.config import HF_CACHE_HOME 19 | 20 | 21 | def transfer(dir1, dir2, **kwargs): 22 | raise NotImplementedError( 23 | f""" 24 | Please implement your own transfer function taking {dir1} {dir2} {kwargs} to push/pull remote models and download resources from a service provider. 25 | Otherwise, please disable "load_from_zoo_use_remote" and "push_to_remote" in config to disable the remote model zoo feature and use local model zoo instead. 26 | Please also make sure all the files/folders required by "required_resources" are already present under "resource_dir". 27 | """ 28 | ) 29 | 30 | 31 | def ropen(remote_filename): 32 | raise NotImplementedError( 33 | f""" 34 | Please implement your own ropen function taking {remote_filename} to create IO handles for remote resources 35 | Otherwise, please disable "load_from_zoo_use_remote" and "push_to_remote" in config to disable the remote model zoo feature and use local model zoo instead. 36 | """ 37 | ) 38 | 39 | 40 | def ls(remote_dir): 41 | raise NotImplementedError( 42 | f""" 43 | Please implement your own remote ls function taking {remote_dir} to list files in a remote directory 44 | Otherwise, please disable "load_from_zoo_use_remote" and "push_to_remote" in config to disable the remote model zoo feature and use local model zoo instead. 45 | """ 46 | ) 47 | 48 | 49 | def check_and_get_remote_resources(config): 50 | if config.required_resources and config.download_remote_resources: 51 | for l_pth, r_pth in vars(config.required_resources).items(): 52 | save_pth = os.path.join(config.resource_dir, l_pth) 53 | if not os.path.exists(save_pth): 54 | logging.info("Starting to transfer {} to {}".format(r_pth, save_pth)) 55 | transfer(r_pth, save_pth) 56 | # some ad-hoc processing for hf datasets cache 57 | if l_pth == "huggingface": 58 | print("Also copying hf datasets cache to {}".format(HF_CACHE_HOME)) 59 | os.makedirs(HF_CACHE_HOME, exist_ok=True) 60 | os.system("cp -r {}/* {}".format(save_pth, HF_CACHE_HOME)) 61 | 62 | 63 | def get_remote_config(s3_path, idx): 64 | print("Getting config from {}".format(s3_path)) 65 | config_path = "./tmp_{}.yaml".format(idx) 66 | transfer(s3_path, config_path, overwrite=True) 67 | return config_path 68 | 69 | 70 | def upload_runs(config): 71 | output_dir, remote_dir = config.main_output_dir, config.s3_runs_dir 72 | dst_dir = os.path.join(remote_dir, output_dir) 73 | 74 | if dst_dir[-1] == "/": 75 | dst_dir = dst_dir[:-1] 76 | dst_dir = "/".join(dst_dir.split("/")[:-1]) 77 | 78 | logging.info("Uploading files under {} to remote {}".format(output_dir, dst_dir)) 79 | transfer(output_dir, dst_dir, overwrite=True) 80 | 81 | 82 | def get_remote_args(s3_path, idx): 83 | print("Getting cmdline args from {}".format(s3_path)) 84 | args_path = "./tmp_{}.args.txt".format(idx) 85 | transfer(s3_path, args_path, overwrite=True) 86 | return args_path 87 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /src/utils/initializer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Bloomberg Finance L.P. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import random 16 | 17 | import numpy as np 18 | import torch 19 | from transformers import ( 20 | AutoConfig, 21 | AutoModelForQuestionAnswering, 22 | AutoModelForSeq2SeqLM, 23 | AutoModelForSequenceClassification, 24 | AutoModelForTokenClassification, 25 | ) 26 | 27 | 28 | def set_seed(seed): 29 | torch.backends.cudnn.deterministic = True 30 | torch.backends.cudnn.benchmark = False 31 | torch.manual_seed(seed) 32 | torch.cuda.manual_seed_all(seed) 33 | np.random.seed(seed) 34 | random.seed(seed) 35 | --------------------------------------------------------------------------------