├── .gitattributes ├── ActiveIT ├── .gitignore ├── ActiveIT_README.ipynb ├── LICENSE ├── beaker_configs │ └── default_experiment.yaml ├── ds_configs │ ├── stage2.config │ ├── stage2_LinearDecay.config │ └── stage3.config ├── get_results.ipynb ├── my_scripts │ ├── TLAL │ │ ├── TLAL_base_pred_script.sh │ │ ├── TLAL_base_script.sh │ │ ├── TLAL_base_script_v1.sh │ │ ├── TLAL_base_script_v2.sh │ │ ├── TLAL_base_script_v3.sh │ │ ├── TLAL_base_script_v4.sh │ │ ├── TLAL_pipeline.py │ │ └── TLAL_utils.py │ └── write_cls_split_into_tasks.py ├── reproduce.sh ├── requirements.txt ├── src │ ├── compute_metrics.py │ ├── convert_data_to_s2s.py │ ├── ni_collator.py │ ├── ni_dataset.py │ ├── ni_trainer.py │ └── run_s2s.py ├── task_metadata.json ├── taskmap.py └── utils.py ├── README.md ├── imgs ├── Active-Instruction-Tuning.png ├── ExampleCurve.png ├── Prompt-Uncertainty-Full-Size.png ├── Task-Map.png └── TaskMapExample.png ├── prepare.sh └── reproduce_splits ├── README.md ├── TLAL_Exp0_all_10 ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt └── init │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── TLAL_Exp0_all_20 ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt └── init │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── TLAL_Exp0_all_30 ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt └── init │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── TLAL_Exp0_all_40 ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt └── init │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── TLAL_Exp0_all_50 ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── HighPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── LowPerplexity_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_0 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_1 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_2 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_3 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_4 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── Random_5 │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt └── init │ ├── dev_tasks.txt │ ├── excluded_tasks.txt │ ├── remain_tasks.txt │ ├── test_tasks.txt │ └── train_tasks.txt ├── default_dev ├── dev_tasks.txt ├── excluded_tasks.txt ├── test_tasks.txt └── train_tasks.txt └── xlingual ├── excluded_tasks.txt ├── test_tasks.txt └── train_tasks.txt /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlusLabNLP/Active-IT/5807f2b3146810149345051a0e49d6d539617ef5/.gitattributes -------------------------------------------------------------------------------- /ActiveIT/.gitignore: -------------------------------------------------------------------------------- 1 | # data 2 | data/ 3 | beaker_configs/ni_*.yaml 4 | backup/ 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # Ignore output 137 | output/ 138 | src/rouge 139 | wandb/ 140 | natural-instructions/ -------------------------------------------------------------------------------- /ActiveIT/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Po-Nien Kung 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ActiveIT/beaker_configs/default_experiment.yaml: -------------------------------------------------------------------------------- 1 | version: v2-alpha 2 | description: natural instructions 3 | tasks: 4 | - name: instruct 5 | image: 6 | beaker: Yizhongw03/ni-exp 7 | command: [ 8 | # deepspeed 9 | "deepspeed", "--master_port", 10086, 10 | # what to do 11 | "src/run_s2s.py", 12 | "--do_train", "--do_predict", 13 | "--predict_with_generate", 14 | # model 15 | "--model_name_or_path", "google/t5-xl-lm-adapt", 16 | "--max_source_length", 1024, 17 | "--max_target_length", 128, 18 | "--generation_max_length", 128, 19 | "--max_num_instances_per_task", 100, 20 | "--max_num_instances_per_eval_task", 100, 21 | "--add_task_name", False, 22 | "--add_task_definition", True, 23 | "--num_pos_examples", 2, 24 | "--num_neg_examples", 0, 25 | "--add_explanation", False, 26 | "--tk_instruct", False, 27 | # path 28 | "--data_dir", "/data/splits/default", 29 | "--task_dir", "/data/tasks", 30 | "--output_dir", "/output/", 31 | "--overwrite_output_dir", 32 | "--cache_dir", "./cache/", 33 | "--overwrite_cache", 34 | # training 35 | "--per_device_train_batch_size", 1, 36 | "--per_device_eval_batch_size", 2, 37 | "--gradient_accumulation_steps", 2, 38 | "--learning_rate", 5e-5, 39 | # "--max_steps", 10000, 40 | "--num_train_epochs", 2, 41 | "--lr_scheduler_type", "constant", 42 | "--warmup_steps", 0, 43 | "--logging_strategy", "steps", 44 | "--logging_steps", 500, 45 | "--evaluation_strategy", "no", 46 | "--save_strategy", "steps", 47 | "--save_steps", 2500, 48 | # deepspeed 49 | "--deepspeed", "ds_configs/stage2.config", 50 | "--bf16", 51 | # log 52 | "--disable_tqdm", True, 53 | "--report_to", "wandb", 54 | "--run_name", "t5-experiment" 55 | ] 56 | envVars: 57 | - name: CUDA_DEVICE_ORDER 58 | value: PCI_BUS_ID 59 | - name: TRANSFORMERS_CACHE 60 | value: ./cache/ 61 | - name: WANDB_PROJECT 62 | value: NaturalInstruction 63 | - name: WANDB_WATCH 64 | value: false 65 | - name: WANDB_LOG_MODEL 66 | value: false 67 | datasets: 68 | - mountPath: /data 69 | source: 70 | beaker: Yizhongw03/natural_instructions_release_0501 71 | result: 72 | # Beaker will capture anything that's written to this location and store it in the results 73 | # dataset. 74 | path: /output 75 | resources: 76 | gpuCount: 8 77 | context: 78 | cluster: ai2/mosaic-cirrascale 79 | priority: normal -------------------------------------------------------------------------------- /ActiveIT/ds_configs/stage2.config: -------------------------------------------------------------------------------- 1 | { 2 | "bfloat16": { 3 | "enabled": "auto" 4 | }, 5 | "fp16": { 6 | "enabled": "auto", 7 | "loss_scale": 0, 8 | "loss_scale_window": 1000, 9 | "initial_scale_power": 16, 10 | "hysteresis": 2, 11 | "min_loss_scale": 1 12 | }, 13 | "optimizer": { 14 | "type": "AdamW", 15 | "params": { 16 | "lr": "auto", 17 | "betas": "auto", 18 | "eps": "auto", 19 | "weight_decay": "auto" 20 | } 21 | }, 22 | "scheduler": { 23 | "type": "WarmupLR", 24 | "params": { 25 | "warmup_min_lr": "auto", 26 | "warmup_max_lr": "auto", 27 | "warmup_num_steps": "auto" 28 | } 29 | }, 30 | "zero_optimization": { 31 | "stage": 1, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "allgather_partitions": true, 37 | "allgather_bucket_size": 2e8, 38 | "overlap_comm": true, 39 | "reduce_scatter": true, 40 | "reduce_bucket_size": 2e8, 41 | "contiguous_gradients": true 42 | }, 43 | "gradient_accumulation_steps": "auto", 44 | "gradient_clipping": "auto", 45 | "train_batch_size": "auto", 46 | "train_micro_batch_size_per_gpu": "auto", 47 | "steps_per_print": 1e5 48 | } -------------------------------------------------------------------------------- /ActiveIT/ds_configs/stage2_LinearDecay.config: -------------------------------------------------------------------------------- 1 | { 2 | "bfloat16": { 3 | "enabled": "auto" 4 | }, 5 | "fp16": { 6 | "enabled": "auto", 7 | "loss_scale": 0, 8 | "loss_scale_window": 1000, 9 | "initial_scale_power": 16, 10 | "hysteresis": 2, 11 | "min_loss_scale": 1 12 | }, 13 | "optimizer": { 14 | "type": "AdamW", 15 | "params": { 16 | "lr": "auto", 17 | "betas": "auto", 18 | "eps": "auto", 19 | "weight_decay": "auto" 20 | } 21 | }, 22 | "scheduler": { 23 | "type": "WarmupDecayLR", 24 | "params": { 25 | "total_num_steps":"auto", 26 | "warmup_min_lr": "auto", 27 | "warmup_max_lr": "auto", 28 | "warmup_num_steps": "auto" 29 | } 30 | }, 31 | "zero_optimization": { 32 | "stage": 2, 33 | "offload_optimizer": { 34 | "device": "cpu", 35 | "pin_memory": true 36 | }, 37 | "allgather_partitions": true, 38 | "allgather_bucket_size": 2e8, 39 | "overlap_comm": true, 40 | "reduce_scatter": true, 41 | "reduce_bucket_size": 2e8, 42 | "contiguous_gradients": true 43 | }, 44 | "gradient_accumulation_steps": "auto", 45 | "gradient_clipping": "auto", 46 | "train_batch_size": "auto", 47 | "train_micro_batch_size_per_gpu": "auto", 48 | "steps_per_print": 1e5 49 | } -------------------------------------------------------------------------------- /ActiveIT/ds_configs/stage3.config: -------------------------------------------------------------------------------- 1 | { 2 | "bfloat16": { 3 | "enabled": false 4 | }, 5 | "fp16": { 6 | "enabled": "auto", 7 | "loss_scale": 0, 8 | "loss_scale_window": 1000, 9 | "initial_scale_power": 16, 10 | "hysteresis": 2, 11 | "min_loss_scale": 1 12 | }, 13 | "optimizer": { 14 | "type": "AdamW", 15 | "params": { 16 | "lr": "auto", 17 | "betas": "auto", 18 | "eps": "auto", 19 | "weight_decay": "auto" 20 | } 21 | }, 22 | "scheduler": { 23 | "type": "WarmupLR", 24 | "params": { 25 | "warmup_min_lr": "auto", 26 | "warmup_max_lr": "auto", 27 | "warmup_num_steps": "auto" 28 | } 29 | }, 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "none", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "none", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "stage3_gather_fp16_weights_on_model_save": true 49 | }, 50 | "gradient_accumulation_steps": "auto", 51 | "gradient_clipping": "auto", 52 | "steps_per_print": 1e5, 53 | "train_batch_size": "auto", 54 | "train_micro_batch_size_per_gpu": "auto", 55 | "wall_clock_breakdown": false 56 | } 57 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/TLAL/TLAL_base_pred_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export TORCH_EXTENSIONS_DIR=/home/ponienkung # refer to https://github.com/pytorch/pytorch/issues/34238 4 | export CUDA_DEVICE_ORDER="PCI_BUS_ID" 5 | export CUDA_VISIBLE_DEVICES="$1" 6 | export DataDir=$2 7 | export run_name=$3 8 | export additional_args=$4 9 | 10 | # export TRANSFORMERS_CACHE=/home/yizhongw/.cache/huggingface 11 | export data_repo="../natural-instructions/" 12 | export SEED=42 13 | # export run_name="${SplitName}_${AL_TYPE}_${AL_ITER}_tk_BS${ExpectBatchSize}_${InstNumPerTask}_lr1e-4_large_def_pos2" 14 | # export model_name_or_path="allenai/tk-instruct-small-def-pos" 15 | export model_name_or_path=output/my_experiment/TLAL/${run_name} 16 | # export model_name_or_path="output/my_experiment/My_tk_BS128_200_lr1e-4_large_label_pos" 17 | # rm -rf ~/.cache/huggingface/datasets/ni_dataset/ 18 | 19 | 20 | port=$(shuf -i25000-30000 -n1) 21 | 22 | deepspeed --master_port $port src/run_s2s.py \ 23 | --do_predict \ 24 | --predict_with_generate \ 25 | --model_name_or_path ${model_name_or_path} \ 26 | --max_source_length 1024 \ 27 | --max_target_length 128 \ 28 | --generation_max_length 128 \ 29 | --max_num_instances_per_task 0 \ 30 | --max_num_instances_per_eval_task 0 \ 31 | --add_task_name False \ 32 | --add_task_definition True \ 33 | --num_pos_examples 2 \ 34 | --num_neg_examples 0 \ 35 | --add_explanation False \ 36 | --tk_instruct False \ 37 | --data_dir $DataDir \ 38 | --task_dir ${data_repo}tasks \ 39 | --output_dir output/my_experiment/TLAL/${run_name} \ 40 | --overwrite_output_dir \ 41 | --overwrite_cache \ 42 | --per_device_eval_batch_size 64 \ 43 | --deepspeed ds_configs/stage3.config \ 44 | --bf16 \ 45 | --seed $SEED \ 46 | --pred_remain \ 47 | ${additional_args} \ 48 | --run_name ${run_name} 49 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/TLAL/TLAL_base_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export TORCH_EXTENSIONS_DIR=/home/ponienkung # refer to https://github.com/pytorch/pytorch/issues/34238 4 | export CUDA_DEVICE_ORDER="PCI_BUS_ID" 5 | export CUDA_VISIBLE_DEVICES="$1" 6 | export DataDir=$2 7 | export run_name=$3 8 | 9 | # export TRANSFORMERS_CACHE=/home/yizhongw/.cache/huggingface 10 | export data_repo="../natural-instructions/" 11 | 12 | export InstNumPerTask="100" 13 | export SEED=42 14 | export ExpectBatchSize=128 15 | export PerDeviceTrainBatchSize=4 16 | # export run_name="${SplitName}_${AL_TYPE}_${AL_ITER}_tk_BS${ExpectBatchSize}_${InstNumPerTask}_lr1e-4_large_def_pos2" 17 | # export model_name_or_path="allenai/tk-instruct-small-def-pos" 18 | export model_name_or_path="google/t5-large-lm-adapt" 19 | # export model_name_or_path="output/my_experiment/My_tk_BS128_200_lr1e-4_large_label_pos" 20 | 21 | # Auto cal grad_accum for batch 128 22 | ceildiv(){ echo $((($1+$2-1)/$2)); } 23 | GPU_NUM=$(ceildiv ${#CUDA_VISIBLE_DEVICES} 2) 24 | GradAccum=$((ExpectBatchSize / PerDeviceTrainBatchSize / GPU_NUM)) 25 | echo "$GradAccum" 26 | 27 | port=$(shuf -i25000-30000 -n1) 28 | 29 | deepspeed --master_port $port src/run_s2s.py \ 30 | --do_train \ 31 | --do_predict \ 32 | --predict_with_generate \ 33 | --model_name_or_path ${model_name_or_path} \ 34 | --max_source_length 1024 \ 35 | --max_target_length 128 \ 36 | --generation_max_length 128 \ 37 | --max_num_instances_per_task $InstNumPerTask \ 38 | --max_num_instances_per_eval_task 100 \ 39 | --max_num_instances_per_test_task 100 \ 40 | --add_task_name False \ 41 | --add_task_definition True \ 42 | --num_pos_examples 2 \ 43 | --num_neg_examples 0 \ 44 | --add_explanation False \ 45 | --tk_instruct False \ 46 | --data_dir $DataDir \ 47 | --task_dir ${data_repo}tasks \ 48 | --output_dir output/my_experiment/TLAL/${run_name} \ 49 | --overwrite_output_dir \ 50 | --overwrite_cache \ 51 | --per_device_train_batch_size $PerDeviceTrainBatchSize \ 52 | --per_device_eval_batch_size 64 \ 53 | --gradient_accumulation_steps $GradAccum \ 54 | --learning_rate 1e-04 \ 55 | --num_train_epochs 8 \ 56 | --lr_scheduler_type constant \ 57 | --warmup_steps 100 \ 58 | --logging_strategy steps \ 59 | --logging_steps 10 \ 60 | --evaluation_strategy epoch \ 61 | --save_strategy epoch \ 62 | --deepspeed ds_configs/stage2.config \ 63 | --bf16 \ 64 | --load_best_model_at_end \ 65 | --metric_for_best_model rougeL_for_train \ 66 | --save_total_limit 1 \ 67 | --seed $SEED \ 68 | --run_name ${run_name} 69 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/TLAL/TLAL_base_script_v1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export TORCH_EXTENSIONS_DIR=/home/ponienkung # refer to https://github.com/pytorch/pytorch/issues/34238 4 | export CUDA_DEVICE_ORDER="PCI_BUS_ID" 5 | export CUDA_VISIBLE_DEVICES="$1" 6 | export DataDir=$2 7 | export run_name=$3 8 | 9 | # export TRANSFORMERS_CACHE=/home/yizhongw/.cache/huggingface 10 | export data_repo="../natural-instructions/" 11 | 12 | export InstNumPerTask="200" 13 | export SEED=42 14 | export ExpectBatchSize=128 15 | export PerDeviceTrainBatchSize=8 16 | # export run_name="${SplitName}_${AL_TYPE}_${AL_ITER}_tk_BS${ExpectBatchSize}_${InstNumPerTask}_lr1e-4_large_def_pos2" 17 | # export model_name_or_path="allenai/tk-instruct-small-def-pos" 18 | export model_name_or_path="google/t5-large-lm-adapt" 19 | # export model_name_or_path="output/my_experiment/My_tk_BS128_200_lr1e-4_large_label_pos" 20 | 21 | # Auto cal grad_accum for batch 128 22 | ceildiv(){ echo $((($1+$2-1)/$2)); } 23 | GPU_NUM=$(ceildiv ${#CUDA_VISIBLE_DEVICES} 2) 24 | GradAccum=$((ExpectBatchSize / PerDeviceTrainBatchSize / GPU_NUM)) 25 | echo "$GradAccum" 26 | 27 | port=$(shuf -i25000-30000 -n1) 28 | 29 | # No example, short source length, larger original batch size, 100 to 200 instances, less epochs 10 to 4 30 | 31 | deepspeed --master_port $port src/run_s2s.py \ 32 | --do_train \ 33 | --do_predict \ 34 | --predict_with_generate \ 35 | --model_name_or_path ${model_name_or_path} \ 36 | --max_source_length 512 \ 37 | --max_target_length 128 \ 38 | --generation_max_length 128 \ 39 | --max_num_instances_per_task $InstNumPerTask \ 40 | --max_num_instances_per_eval_task 20 \ 41 | --max_num_instances_per_test_task 100 \ 42 | --add_task_name False \ 43 | --add_task_definition True \ 44 | --num_pos_examples 0 \ 45 | --num_neg_examples 0 \ 46 | --add_explanation False \ 47 | --tk_instruct False \ 48 | --data_dir $DataDir \ 49 | --task_dir ${data_repo}tasks \ 50 | --output_dir output/my_experiment/TLAL/${run_name} \ 51 | --overwrite_output_dir \ 52 | --overwrite_cache \ 53 | --per_device_train_batch_size $PerDeviceTrainBatchSize \ 54 | --per_device_eval_batch_size 128 \ 55 | --gradient_accumulation_steps $GradAccum \ 56 | --learning_rate 1e-04 \ 57 | --num_train_epochs 4 \ 58 | --lr_scheduler_type constant \ 59 | --warmup_steps 50 \ 60 | --logging_strategy steps \ 61 | --logging_steps 10 \ 62 | --evaluation_strategy epoch \ 63 | --save_strategy epoch \ 64 | --deepspeed ds_configs/stage2.config \ 65 | --bf16 \ 66 | --load_best_model_at_end \ 67 | --metric_for_best_model rougeL_for_train \ 68 | --save_total_limit 1 \ 69 | --seed $SEED \ 70 | --run_name ${run_name} 71 | 72 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/TLAL/TLAL_base_script_v2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export TORCH_EXTENSIONS_DIR=/home/ponienkung # refer to https://github.com/pytorch/pytorch/issues/34238 4 | export CUDA_DEVICE_ORDER="PCI_BUS_ID" 5 | export CUDA_VISIBLE_DEVICES="$1" 6 | export DataDir=$2 7 | export run_name=$3 8 | 9 | # export TRANSFORMERS_CACHE=/home/yizhongw/.cache/huggingface 10 | export data_repo="../natural-instructions/" 11 | 12 | export InstNumPerTask="200" 13 | export SEED=42 14 | export ExpectBatchSize=128 15 | export PerDeviceTrainBatchSize=4 16 | # export run_name="${SplitName}_${AL_TYPE}_${AL_ITER}_tk_BS${ExpectBatchSize}_${InstNumPerTask}_lr1e-4_large_def_pos2" 17 | # export model_name_or_path="allenai/tk-instruct-small-def-pos" 18 | export model_name_or_path="google/t5-large-lm-adapt" 19 | # export model_name_or_path="output/my_experiment/My_tk_BS128_200_lr1e-4_large_label_pos" 20 | 21 | # Auto cal grad_accum for batch 128 22 | ceildiv(){ echo $((($1+$2-1)/$2)); } 23 | GPU_NUM=$(ceildiv ${#CUDA_VISIBLE_DEVICES} 2) 24 | GradAccum=$((ExpectBatchSize / PerDeviceTrainBatchSize / GPU_NUM)) 25 | echo "$GradAccum" 26 | 27 | port=$(shuf -i25000-30000 -n1) 28 | 29 | # Add examples, long source length, smaller original batch size, more eval instances 20 --> 100 30 | 31 | deepspeed --master_port $port src/run_s2s.py \ 32 | --do_train \ 33 | --do_predict \ 34 | --predict_with_generate \ 35 | --model_name_or_path ${model_name_or_path} \ 36 | --max_source_length 1024 \ 37 | --max_target_length 128 \ 38 | --generation_max_length 128 \ 39 | --max_num_instances_per_task $InstNumPerTask \ 40 | --max_num_instances_per_eval_task 100 \ 41 | --max_num_instances_per_test_task 100 \ 42 | --add_task_name False \ 43 | --add_task_definition True \ 44 | --num_pos_examples 2 \ 45 | --num_neg_examples 0 \ 46 | --add_explanation False \ 47 | --tk_instruct False \ 48 | --data_dir $DataDir \ 49 | --task_dir ${data_repo}tasks \ 50 | --output_dir output/my_experiment/TLAL/${run_name} \ 51 | --overwrite_output_dir \ 52 | --overwrite_cache \ 53 | --per_device_train_batch_size $PerDeviceTrainBatchSize \ 54 | --per_device_eval_batch_size 64 \ 55 | --gradient_accumulation_steps $GradAccum \ 56 | --learning_rate 1e-04 \ 57 | --num_train_epochs 4 \ 58 | --lr_scheduler_type constant \ 59 | --warmup_steps 50 \ 60 | --logging_strategy steps \ 61 | --logging_steps 10 \ 62 | --evaluation_strategy epoch \ 63 | --save_strategy epoch \ 64 | --deepspeed ds_configs/stage2.config \ 65 | --bf16 \ 66 | --load_best_model_at_end \ 67 | --metric_for_best_model rougeL_for_train \ 68 | --save_total_limit 1 \ 69 | --seed $SEED \ 70 | --run_name ${run_name} 71 | 72 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/TLAL/TLAL_base_script_v3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export TORCH_EXTENSIONS_DIR=/home/ponienkung # refer to https://github.com/pytorch/pytorch/issues/34238 4 | export CUDA_DEVICE_ORDER="PCI_BUS_ID" 5 | export CUDA_VISIBLE_DEVICES="$1" 6 | export DataDir=$2 7 | export run_name=$3 8 | 9 | # export TRANSFORMERS_CACHE=/home/yizhongw/.cache/huggingface 10 | export data_repo="../natural-instructions/" 11 | 12 | export InstNumPerTask="200" 13 | export SEED=42 14 | export ExpectBatchSize=128 15 | export PerDeviceTrainBatchSize=4 16 | # export run_name="${SplitName}_${AL_TYPE}_${AL_ITER}_tk_BS${ExpectBatchSize}_${InstNumPerTask}_lr1e-4_large_def_pos2" 17 | # export model_name_or_path="allenai/tk-instruct-small-def-pos" 18 | export model_name_or_path="google/t5-large-lm-adapt" 19 | # export model_name_or_path="output/my_experiment/My_tk_BS128_200_lr1e-4_large_label_pos" 20 | 21 | # Auto cal grad_accum for batch 128 22 | ceildiv(){ echo $((($1+$2-1)/$2)); } 23 | GPU_NUM=$(ceildiv ${#CUDA_VISIBLE_DEVICES} 2) 24 | GradAccum=$((ExpectBatchSize / PerDeviceTrainBatchSize / GPU_NUM)) 25 | echo "$GradAccum" 26 | 27 | port=$(shuf -i25000-30000 -n1) 28 | 29 | # No example, short source length, larger original batch size, 100 to 200 instances, less epochs 10 to 4 30 | 31 | deepspeed --master_port $port src/run_s2s.py \ 32 | --do_train \ 33 | --do_predict \ 34 | --predict_with_generate \ 35 | --model_name_or_path ${model_name_or_path} \ 36 | --max_source_length 1024 \ 37 | --max_target_length 128 \ 38 | --generation_max_length 128 \ 39 | --max_num_instances_per_task $InstNumPerTask \ 40 | --max_num_instances_per_eval_task 50 \ 41 | --max_num_instances_per_test_task 100 \ 42 | --add_task_name False \ 43 | --add_task_definition True \ 44 | --num_pos_examples 2 \ 45 | --num_neg_examples 0 \ 46 | --add_explanation False \ 47 | --tk_instruct False \ 48 | --data_dir $DataDir \ 49 | --task_dir ${data_repo}tasks \ 50 | --output_dir output/my_experiment/TLAL/${run_name} \ 51 | --overwrite_output_dir \ 52 | --overwrite_cache \ 53 | --per_device_train_batch_size $PerDeviceTrainBatchSize \ 54 | --per_device_eval_batch_size 64 \ 55 | --gradient_accumulation_steps $GradAccum \ 56 | --learning_rate 5e-05 \ 57 | --num_train_epochs 6 \ 58 | --lr_scheduler_type constant \ 59 | --warmup_steps 50 \ 60 | --logging_strategy steps \ 61 | --logging_steps 10 \ 62 | --evaluation_strategy epoch \ 63 | --save_strategy epoch \ 64 | --deepspeed ds_configs/stage2.config \ 65 | --bf16 \ 66 | --load_best_model_at_end \ 67 | --metric_for_best_model rougeL_for_train \ 68 | --save_total_limit 1 \ 69 | --seed $SEED \ 70 | --run_name ${run_name} 71 | 72 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/TLAL/TLAL_base_script_v4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export TORCH_EXTENSIONS_DIR=/home/ponienkung # refer to https://github.com/pytorch/pytorch/issues/34238 4 | export CUDA_DEVICE_ORDER="PCI_BUS_ID" 5 | export CUDA_VISIBLE_DEVICES="$1" 6 | export DataDir=$2 7 | export run_name=$3 8 | 9 | # export TRANSFORMERS_CACHE=/home/yizhongw/.cache/huggingface 10 | export data_repo="../natural-instructions/" 11 | 12 | export InstNumPerTask="200" 13 | export SEED=42 14 | export ExpectBatchSize=128 15 | export PerDeviceTrainBatchSize=4 16 | # export run_name="${SplitName}_${AL_TYPE}_${AL_ITER}_tk_BS${ExpectBatchSize}_${InstNumPerTask}_lr1e-4_large_def_pos2" 17 | # export model_name_or_path="allenai/tk-instruct-small-def-pos" 18 | export model_name_or_path="google/t5-large-lm-adapt" 19 | # export model_name_or_path="output/my_experiment/My_tk_BS128_200_lr1e-4_large_label_pos" 20 | 21 | # Auto cal grad_accum for batch 128 22 | ceildiv(){ echo $((($1+$2-1)/$2)); } 23 | GPU_NUM=$(ceildiv ${#CUDA_VISIBLE_DEVICES} 2) 24 | GradAccum=$((ExpectBatchSize / PerDeviceTrainBatchSize / GPU_NUM)) 25 | echo "$GradAccum" 26 | 27 | port=$(shuf -i25000-30000 -n1) 28 | 29 | deepspeed --master_port $port src/run_s2s.py \ 30 | --do_train \ 31 | --do_predict \ 32 | --predict_with_generate \ 33 | --model_name_or_path ${model_name_or_path} \ 34 | --max_source_length 1024 \ 35 | --max_target_length 128 \ 36 | --generation_max_length 128 \ 37 | --max_num_instances_per_task $InstNumPerTask \ 38 | --max_num_instances_per_eval_task 50 \ 39 | --max_num_instances_per_test_task 100 \ 40 | --add_task_name False \ 41 | --add_task_definition True \ 42 | --num_pos_examples 2 \ 43 | --num_neg_examples 0 \ 44 | --add_explanation False \ 45 | --tk_instruct False \ 46 | --data_dir $DataDir \ 47 | --task_dir ${data_repo}tasks \ 48 | --output_dir output/my_experiment/TLAL/${run_name} \ 49 | --overwrite_output_dir \ 50 | --overwrite_cache \ 51 | --per_device_train_batch_size $PerDeviceTrainBatchSize \ 52 | --per_device_eval_batch_size 64 \ 53 | --gradient_accumulation_steps $GradAccum \ 54 | --learning_rate 5e-05 \ 55 | --num_train_epochs 8 \ 56 | --lr_scheduler_type constant \ 57 | --warmup_steps 50 \ 58 | --logging_strategy steps \ 59 | --logging_steps 10 \ 60 | --evaluation_strategy epoch \ 61 | --save_strategy epoch \ 62 | --deepspeed ds_configs/stage2.config \ 63 | --bf16 \ 64 | --load_best_model_at_end \ 65 | --metric_for_best_model rougeL_for_train_cls \ 66 | --save_total_limit 1 \ 67 | --seed $SEED \ 68 | --run_name ${run_name} 69 | 70 | -------------------------------------------------------------------------------- /ActiveIT/my_scripts/write_cls_split_into_tasks.py: -------------------------------------------------------------------------------- 1 | import json 2 | from tqdm import tqdm 3 | import os 4 | 5 | def load_file(task_name): 6 | task_file = os.path.join("../natural-instructions/tasks", task_name) 7 | return json.load(open(task_file, "r")) 8 | def write_file(task_name, new_data): 9 | task_file = os.path.join("../natural-instructions/tasks", task_name) 10 | with open(task_file, "w") as F: 11 | json.dump(new_data, F, indent = 4) 12 | 13 | meta_data_file = "task_metadata.json" 14 | meta_data = json.load(open(meta_data_file, "r")) 15 | 16 | print("Writing Split and CLS_GEN tag into task file. We only have this information for EN tasks.") 17 | for task in tqdm(meta_data.keys()): 18 | if "Split" in meta_data[task].keys(): 19 | Split = meta_data[task]["Split"] 20 | else: 21 | Split = None 22 | if "CLS_GEN" in meta_data[task].keys(): 23 | CLS_GEN = meta_data[task]["CLS_GEN"] 24 | else: 25 | CLS_GEN = None 26 | if (CLS_GEN is not None) or (Split is not None): 27 | task_data = load_file(task) 28 | if Split is not None: 29 | task_data['Split'] = Split 30 | if CLS_GEN is not None: 31 | task_data['CLS_GEN'] = CLS_GEN 32 | write_file(task, task_data) 33 | 34 | print("Done...") -------------------------------------------------------------------------------- /ActiveIT/reproduce.sh: -------------------------------------------------------------------------------- 1 | export GPUS="0,1,2,3" 2 | 3 | # Important Note: For reproduce purpose, we will add the arg --no_update_task_list, which disable the model to update the task list and use the task list we get from our experiments 4 | # The reason why we do this is because the task selection can have huge variance for single run. By using this metric, you can get a closer performance to our experiment results 5 | # If you disable this argument, you should still get similar results when averaging results from multiple(5) random seeds. This can take time though. 6 | 7 | # If not reproducing, make sure to remove --no_update_task_list args 8 | 9 | # Baseline: Random Sampling 10 | python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type Random --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_10 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 11 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type Random --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_20 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 12 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type Random --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_30 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 13 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type Random --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_40 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 14 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type Random --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_50 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 15 | 16 | # # Baseline: Low Perplexity 17 | python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type LowPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_10 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 18 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type LowPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_20 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 19 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type LowPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_30 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 20 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type LowPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_40 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 21 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type LowPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_50 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 22 | 23 | # # Baseline: High Perplexity 24 | python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type HighPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_10 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 25 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type HighPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_20 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 26 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type HighPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_30 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 27 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type HighPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_40 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 28 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type HighPerplexity --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_50 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --no_update_task_list 29 | 30 | # # Proposed: Prompt Uncertainty 31 | python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_10 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --perturb_num 10 --no_update_task_list 32 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_20 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --perturb_num 10 --no_update_task_list 33 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_30 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --perturb_num 10 --no_update_task_list 34 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_40 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --perturb_num 10 --no_update_task_list 35 | # python3 my_scripts/TLAL/TLAL_pipeline.py --AL_type FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald --gpus $GPUS --split_dir ../natural-instructions/splits/TLAL_Exp0_all_50 --max_iter 5 --fix_cls_gen_ratio 0.356 --base_script my_scripts/TLAL/TLAL_base_script_v4.sh --perturb_num 10 --no_update_task_list 36 | -------------------------------------------------------------------------------- /ActiveIT/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.18.0 2 | datasets==2.6.1 3 | git+https://github.com/microsoft/DeepSpeed.git@9f7126fc10a1f231c695db11632cbcb84f1af4d3 4 | fairscale==0.4.5 5 | ipython 6 | nltk 7 | tensorboard 8 | tqdm 9 | rouge_score 10 | wandb==0.12.10 11 | sentencepiece==0.1.96 12 | seaborn -------------------------------------------------------------------------------- /ActiveIT/src/convert_data_to_s2s.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This script is used for converting our json data into input/output format and save in tsv file. 3 | This is used to training the T5-11B model on TPU. 4 | ''' 5 | 6 | import os 7 | import json 8 | import glob 9 | import tqdm 10 | import pandas as pd 11 | from transformers import HfArgumentParser, GPT2TokenizerFast 12 | from run_s2s import DataTrainingArguments 13 | from datasets import load_dataset 14 | from ni_collator import DataCollatorForNI 15 | from dataclasses import dataclass, field 16 | from nltk import sent_tokenize 17 | 18 | @dataclass 19 | class CustomizedArguments: 20 | output_dir: str = field( 21 | default="data/text2text/", metadata={"help": "The directory for saving splits."} 22 | ) 23 | 24 | if __name__ == "__main__": 25 | parser = HfArgumentParser((DataTrainingArguments, CustomizedArguments)) 26 | args, customized_args = parser.parse_args_into_dataclasses() 27 | raw_datasets = load_dataset( 28 | "src/ni_dataset.py", 29 | data_dir=args.data_dir, 30 | task_dir=args.task_dir, 31 | max_num_instances_per_task=args.max_num_instances_per_task, 32 | max_num_instances_per_eval_task=args.max_num_instances_per_eval_task 33 | ) 34 | 35 | tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") 36 | data_collator = DataCollatorForNI( 37 | tokenizer, 38 | model=None, 39 | padding="max_length" if args.pad_to_max_length else "longest", 40 | max_source_length=args.max_source_length, 41 | max_target_length=args.max_target_length, 42 | add_task_definition=args.add_task_definition, 43 | num_pos_examples=args.num_pos_examples, 44 | num_neg_examples=args.num_neg_examples, 45 | add_explanation=args.add_explanation, 46 | text_only=True 47 | ) 48 | 49 | os.makedirs(customized_args.output_dir, exist_ok=True) 50 | 51 | for split in ["train", "test"]: 52 | with open(os.path.join(customized_args.output_dir, f"{split}.tsv"), "w") as fout1, \ 53 | open(os.path.join(customized_args.output_dir, f"{split}_examples.jsonl"), "w") as fout2: 54 | for example in tqdm.tqdm(raw_datasets[split]): 55 | encoded_example = data_collator([example]) 56 | fout1.write( 57 | " ".join(encoded_example["inputs"][0].split()) + "\t" + " ".join(encoded_example["labels"][0].split()) + "\n" 58 | ) 59 | example["s2s_input"] = " ".join(encoded_example["inputs"][0].split()) 60 | example["s2s_output"] = " ".join(encoded_example["labels"][0].split()) 61 | fout2.write(json.dumps(example) + "\n") 62 | -------------------------------------------------------------------------------- /imgs/Active-Instruction-Tuning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlusLabNLP/Active-IT/5807f2b3146810149345051a0e49d6d539617ef5/imgs/Active-Instruction-Tuning.png -------------------------------------------------------------------------------- /imgs/ExampleCurve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlusLabNLP/Active-IT/5807f2b3146810149345051a0e49d6d539617ef5/imgs/ExampleCurve.png -------------------------------------------------------------------------------- /imgs/Prompt-Uncertainty-Full-Size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlusLabNLP/Active-IT/5807f2b3146810149345051a0e49d6d539617ef5/imgs/Prompt-Uncertainty-Full-Size.png -------------------------------------------------------------------------------- /imgs/Task-Map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlusLabNLP/Active-IT/5807f2b3146810149345051a0e49d6d539617ef5/imgs/Task-Map.png -------------------------------------------------------------------------------- /imgs/TaskMapExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlusLabNLP/Active-IT/5807f2b3146810149345051a0e49d6d539617ef5/imgs/TaskMapExample.png -------------------------------------------------------------------------------- /prepare.sh: -------------------------------------------------------------------------------- 1 | # Prepare Data 2 | # git clone https://github.com/allenai/natural-instructions.git 3 | cp -r reproduce_splits/* natural-instructions/splits/ 4 | 5 | # Add tags into data 6 | python3 my_scripts/write_cls_split_into_tasks.py 7 | 8 | # Create output dir 9 | mkdir -p ActiveIT/output/my_experiment/TLAL/ 10 | 11 | -------------------------------------------------------------------------------- /reproduce_splits/README.md: -------------------------------------------------------------------------------- 1 | # Evaluation Setup of Natural Instruction V2 2 | 3 | Here we describe the evaluation setup used in [our paper](https://arxiv.org/abs/2204.07705) which can be used for reproducing our experiments or extending them. The target setup of the study is a cross-task generalization, i.e., training on a subset of tasks and evaluating on the remaining unseen ones. To do so, we split out tasks into two subsets: one for training and one for testing. Users can use the training tasks for any modeling purpose, while the testing tasks should be used to evaluate and compare different models. 4 | 5 | Moreover, because of the multilingual nature of Natural Instruction V2, we are able to evaluate the model’s generalization to unseen tasks not only in English but also in other languages. Therefore, we create two evaluation tracks: one for cross-task generalization within English, and the other for cross-lingual cross-task generalization. 6 | 7 | ## English Track 8 | 9 | The [`default`](default) folder contains our splits for the English-only tasks (i.e., both task input and task output are in English). Each line in the `.txt` file corresponds one task in our [task folder](../tasks/). 10 | 11 | The `train_tasks.txt` should be used for your modeling, from which you can also set apart some tasks for validation if needed. 12 | 13 | The `test_tasks.txt` contains the tasks for evaluation. We manually selected 12 categories of tasks for evaluation. They cover diverse varieties, such as those at word, sentence, and document levels, in both classification and generation format. **Note:** You are supposed to use them only for reporting the performance. When you get the predictions, you are welcome to submit your predictions to our leaderboard (We will release the submission instructions soon)! 14 | 15 | The `excluded_tasks.txt` contains all the tasks that are excluded from both training and testing (e.g., non-English tasks for the default track). Specifically, we exclude tasks that are sourced from the same dataset as any testing task, in order to avoid potential data leakage. 16 | 17 | ## Cross-lingual Track 18 | 19 | The cross-lingual track is mainly used for testing whether a model can even follow instructions to do a new task in other languages (i.e., cross-lingual cross-task generalization). To facilitate this goal, we include only non-English tasks in the `test_tasks.txt`, which also belongs to the 12 evaluation categories. Some non-English tasks in other categories (e.g., translation) are added to the `train_tasks.txt`. 20 | 21 | ## Testing Instances 22 | 23 | Since we have 119 / 35 test tasks for the English / xlingual tracks respectively and each task might have at most 6.5K instances, evaluation on all these instances will become very slow and unnecessary. 24 | 25 | For an efficient evaluation, we selected 100 instances from each task to do the testing. These instances are also selected with label balancing if possible (valid mainly for classification tasks). See [`src/reorder_instances_for_testing.py`](src/reorder_instances_for_testing.py) for details. For reproducibility, these 100 instances are put at the begining of the `Instances` field. We will only evaluate model's performance on these testing instances. You can get them by slicing the `Instances` list: 26 | 27 | ```python 28 | test_instances = task_json["Instances"][:100] 29 | ``` 30 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | task143_odd-man-out_classification_generate_category 70 | task854_hippocorpus_classification 71 | task046_miscellaneous_question_typing 72 | task1361_movierationales_classification 73 | task1500_dstc3_classification 74 | task523_find_if_numbers_or_alphabets_are_more_in_list 75 | task1418_bless_semantic_relation_classification 76 | task1089_check_monotonic_array 77 | task317_crows-pairs_classification_stereotype_type 78 | task309_race_answer_generation 79 | task212_logic2text_classification 80 | task1354_sent_comp_classification 81 | task521_trivia_question_classification 82 | task027_drop_answer_type_generation 83 | task1720_civil_comments_toxicity_classification 84 | task1289_trec_classification 85 | task1721_civil_comments_obscenity_classification 86 | task298_storycloze_correct_end_classification 87 | task767_craigslist_bargains_classification 88 | task923_event2mind_classifier 89 | task900_freebase_qa_category_classification 90 | task1191_food_veg_nonveg 91 | task284_imdb_classification 92 | task1505_root09_semantic_relation_classification 93 | task170_hotpotqa_answer_generation 94 | task1322_country_government_type 95 | task598_cuad_answer_generation 96 | task399_semeval_2018_task1_tweet_sadness_detection 97 | task599_cuad_question_generation 98 | task597_cuad_answer_generation 99 | task522_news_editorial_summary 100 | task347_hybridqa_incorrect_answer_generation 101 | task1291_multi_news_summarization 102 | task194_duorc_answer_generation 103 | task1427_country_region_in_world 104 | task182_duorc_question_generation 105 | task369_synthetic_remove_odds 106 | task370_synthetic_remove_divisible_by_3 107 | task1192_food_flavor_profile 108 | task631_dbpedia_14_incorrect_answer_generation 109 | task028_drop_answer_generation 110 | task1607_ethos_text_classification 111 | task302_record_classification 112 | task388_torque_token_classification 113 | task1553_cnn_dailymail_summarization 114 | task1368_healthfact_sentence_generation 115 | task372_synthetic_palindrome_numbers 116 | task405_narrativeqa_question_generation 117 | task382_hybridqa_answer_generation 118 | task375_classify_type_of_sentence_in_debate 119 | task517_emo_classify_emotion_of_dialogue 120 | task573_air_dialogue_classification 121 | task097_conala_remove_duplicates 122 | task469_mrqa_answer_generation 123 | task141_odd-man-out_classification_category 124 | task276_enhanced_wsc_classification 125 | task1704_ljspeech_textmodification 126 | task165_mcscript_question_answering_commonsense 127 | task269_csrg_counterfactual_story_generation 128 | task1451_drug_dose_extraction 129 | task1498_24hour_to_12hour_clock 130 | task1669_md_gender_bias_text_modification 131 | task191_hotpotqa_question_generation 132 | task1339_peixian_equity_evaluation_corpus_text_completion 133 | task1292_yelp_review_full_text_categorization 134 | task381_boolq_question_generation 135 | task183_rhyme_generation 136 | task082_babi_t1_single_supporting_fact_question_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/HighPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/LowPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/Random_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/Random_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | task227_clariq_classification 70 | task1289_trec_classification 71 | task493_review_polarity_classification 72 | task1210_atomic_classification_madeupof 73 | task327_jigsaw_classification_toxic 74 | task664_mmmlu_answer_generation_abstract_algebra 75 | task143_odd-man-out_classification_generate_category 76 | task566_circa_classification 77 | task854_hippocorpus_classification 78 | task1089_check_monotonic_array 79 | task317_crows-pairs_classification_stereotype_type 80 | task456_matres_intention_classification 81 | task706_mmmlu_answer_generation_high_school_mathematics 82 | task400_paws_paraphrase_classification 83 | task065_timetravel_consistent_sentence_classification 84 | task1201_atomic_classification_xintent 85 | task833_poem_sentiment_classification 86 | task1168_brown_coarse_pos_tagging 87 | task196_sentiment140_answer_generation 88 | task355_casino_classification_negotiation_other_need 89 | task358_casino_classification_negotiation_uv_part 90 | task1720_civil_comments_toxicity_classification 91 | task1727_wiqa_what_is_the_effect 92 | task274_overruling_legal_classification 93 | task077_splash_explanation_to_sql 94 | task166_clariq_sentence_generation 95 | task125_conala_pair_differences 96 | task275_enhanced_wsc_paraphrase_generation 97 | task1498_24hour_to_12hour_clock 98 | task574_air_dialogue_sentence_generation 99 | task927_yelp_negative_to_positive_style_transfer 100 | task1383_quarel_write_incorrect_answer 101 | task1310_amazonreview_rating_classification 102 | task367_synthetic_remove_floats 103 | task649_race_blank_question_generation 104 | task1487_organism_substance_extraction_anem_dataset 105 | task461_qasper_question_generation 106 | task045_miscellaneous_sentence_paraphrasing 107 | task155_count_nouns_verbs 108 | task631_dbpedia_14_incorrect_answer_generation 109 | task382_hybridqa_answer_generation 110 | task675_google_wellformed_query_sentence_generation 111 | task375_classify_type_of_sentence_in_debate 112 | task597_cuad_answer_generation 113 | task1445_closest_integers 114 | task1509_evalution_antonyms 115 | task1150_delete_max_min 116 | task072_abductivenli_answer_generation 117 | task088_identify_typo_verification 118 | task151_tomqa_find_location_easy_clean 119 | task636_extract_and_sort_unique_alphabets_in_a_list 120 | task955_wiki_auto_style_transfer 121 | task1596_event2mind_text_generation_2 122 | task1704_ljspeech_textmodification 123 | task746_yelp_restaurant_review_classification 124 | task1412_web_questions_question_answering 125 | task898_freebase_qa_answer_generation 126 | task1566_propara_structured_text_generation 127 | task183_rhyme_generation 128 | task1729_personachat_generate_next 129 | task599_cuad_question_generation 130 | task1479_organization_entity_extraction_btc_corpus 131 | task506_position_of_all_alphabetical_elements_in_list 132 | task568_circa_question_generation 133 | task377_remove_words_of_given_length 134 | task381_boolq_question_generation 135 | task099_reverse_elements_between_index_i_and_j 136 | task084_babi_t1_single_supporting_fact_identify_relevant_fact 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_10/init/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task283_dream_incorrect_answer_generation 2 | task1366_healthfact_classification 3 | task091_all_elements_from_index_i_to_j 4 | task299_storycloze_sentence_generation 5 | task505_count_all_numerical_elements_in_list 6 | task1714_convai3_sentence_generation 7 | task1188_count_max_freq_char 8 | task514_argument_consequence_classification 9 | task1730_personachat_choose_next 10 | task1419_mathqa_gain 11 | task368_synthetic_even_or_odd_calculation 12 | task756_find_longert_substring_and_return_all_unique_alphabets_in_it 13 | task710_mmmlu_answer_generation_high_school_statistics 14 | task1583_bless_meronym_classification 15 | task1389_hellaswag_completion 16 | task860_prost_mcq_generation 17 | task615_moviesqa_answer_generation 18 | task1347_glue_sts-b_similarity_classification 19 | task1520_qa_srl_answer_generation 20 | task142_odd-man-out_classification_no_category 21 | task874_opus_xhosanavy_sr 22 | task350_winomt_classification_gender_identifiability_pro 23 | task593_sciq_explanation_generation 24 | task715_mmmlu_answer_generation_international_law 25 | task888_reviews_classification 26 | task137_detoxifying-lms_classification_toxicity 27 | task149_afs_argument_quality_death_penalty 28 | task026_drop_question_generation 29 | task1483_chemical_extraction_chemprot_dataset 30 | task454_swag_incorrect_answer_generation 31 | task320_stereoset_classification_race 32 | task207_max_element_lists 33 | task733_mmmlu_answer_generation_security_studies 34 | task1421_mathqa_other 35 | task1508_wordnet_antonyms 36 | task311_race_question_generation 37 | task683_online_privacy_policy_text_purpose_answer_generation 38 | task875_emotion_classification 39 | task1147_country_currency 40 | task243_count_elements_in_set_intersection 41 | task301_record_question_generation 42 | task672_amazon_and_yelp_summarization_dataset_summarization 43 | task589_amazonfood_summary_text_generation 44 | task145_afs_argument_similarity_death_penalty 45 | task160_replace_letter_in_a_sentence 46 | task870_msmarco_answer_generation 47 | task1149_item_check_edible 48 | task1332_check_leap_year 49 | task126_scan_structured_text_generation_command_action_all 50 | task1501_dstc3_answer_generation 51 | task665_mmmlu_answer_generation_anatomy 52 | task316_crows-pairs_classification_stereotype 53 | task1296_wiki_hop_question_answering 54 | task834_mathdataset_classification 55 | task684_online_privacy_policy_text_information_type_generation 56 | task228_arc_answer_generation_easy 57 | task627_xlwic_word_with_same_meaning_sentence_generation 58 | task821_protoqa_question_generation 59 | task515_senteval_odd_word_out 60 | task286_olid_offense_judgment 61 | task488_extract_all_alphabetical_elements_from_list_in_order 62 | task162_count_words_starting_with_letter 63 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 64 | task1190_add_integer_to_list 65 | task130_scan_structured_text_generation_command_action_long 66 | task161_count_words_containing_letter 67 | task022_cosmosqa_passage_inappropriate_binary 68 | task1725_civil_comments_severtoxicity_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | task564_discofuse_classification 70 | task583_udeps_eng_coarse_pos_tagging 71 | task1606_ethos_text_classification 72 | task682_online_privacy_policy_text_classification 73 | task212_logic2text_classification 74 | task309_race_answer_generation 75 | task584_udeps_eng_fine_pos_tagging 76 | task280_stereoset_classification_stereotype_type 77 | task1354_sent_comp_classification 78 | task1645_medical_question_pair_dataset_text_classification 79 | task1328_qa_zre_relation_generation_from_question 80 | task1727_wiqa_what_is_the_effect 81 | task617_amazonreview_category_text_generation 82 | task889_goemotions_classification 83 | task1361_movierationales_classification 84 | task848_pubmedqa_classification 85 | task337_hateeval_classification_individual_en 86 | task398_semeval_2018_task1_tweet_joy_detection 87 | task1434_head_qa_classification 88 | task1661_super_glue_classification 89 | task903_deceptive_opinion_spam_classification 90 | task1500_dstc3_classification 91 | task900_freebase_qa_category_classification 92 | task1167_penn_treebank_coarse_pos_tagging 93 | task597_cuad_answer_generation 94 | task681_hope_edi_malayalam_text_classification 95 | task1339_peixian_equity_evaluation_corpus_text_completion 96 | task170_hotpotqa_answer_generation 97 | task191_hotpotqa_question_generation 98 | task183_rhyme_generation 99 | task585_preposition_classification 100 | task1406_kth_smallest_element 101 | task310_race_classification 102 | task1510_evalution_relation_extraction 103 | task276_enhanced_wsc_classification 104 | task631_dbpedia_14_incorrect_answer_generation 105 | task275_enhanced_wsc_paraphrase_generation 106 | task1427_country_region_in_world 107 | task129_scan_long_text_generation_action_command_short 108 | task1451_drug_dose_extraction 109 | task1194_kth_largest_element 110 | task389_torque_generate_temporal_question 111 | task1607_ethos_text_classification 112 | task1448_disease_entity_extraction_ncbi_dataset 113 | task1568_propara_classification 114 | task576_curiosity_dialogs_answer_generation 115 | task598_cuad_answer_generation 116 | task142_odd-man-out_classification_no_category 117 | task141_odd-man-out_classification_category 118 | task244_count_elements_in_set_union 119 | task399_semeval_2018_task1_tweet_sadness_detection 120 | task573_air_dialogue_classification 121 | task1315_find_range_array 122 | task1310_amazonreview_rating_classification 123 | task268_casehold_legal_answer_generation 124 | task683_online_privacy_policy_text_purpose_answer_generation 125 | task1580_eqasc-perturbed_question_generation 126 | task1730_personachat_choose_next 127 | task1322_country_government_type 128 | task517_emo_classify_emotion_of_dialogue 129 | task342_winomt_classification_profession_pro 130 | task1399_obqa_answer_generation 131 | task379_agnews_topic_classification 132 | task1338_peixian_equity_evaluation_corpus_sentiment_classifier 133 | task594_sciq_question_generation 134 | task131_scan_long_text_generation_action_command_long 135 | task515_senteval_odd_word_out 136 | task1498_24hour_to_12hour_clock 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/HighPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/HighPerplexity_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | task1168_brown_coarse_pos_tagging 70 | task1593_yahoo_answers_topics_classification 71 | task1503_hatexplain_classification 72 | task715_mmmlu_answer_generation_international_law 73 | task1541_agnews_classification 74 | task721_mmmlu_answer_generation_medical_genetics 75 | task617_amazonreview_category_text_generation 76 | task712_mmmlu_answer_generation_high_school_world_history 77 | task664_mmmlu_answer_generation_abstract_algebra 78 | task665_mmmlu_answer_generation_anatomy 79 | task732_mmmlu_answer_generation_public_relations 80 | task700_mmmlu_answer_generation_high_school_chemistry 81 | task699_mmmlu_answer_generation_high_school_biology 82 | task693_mmmlu_answer_generation_conceptual_physics 83 | task722_mmmlu_answer_generation_random_topic 84 | task667_mmmlu_answer_generation_business_ethics 85 | task073_commonsenseqa_answer_generation 86 | task900_freebase_qa_category_classification 87 | task702_mmmlu_answer_generation_high_school_european_history 88 | task707_mmmlu_answer_generation_high_school_microeconomics 89 | task723_mmmlu_answer_generation_moral_disputes 90 | task698_mmmlu_answer_generation_global_facts 91 | task309_race_answer_generation 92 | task696_mmmlu_answer_generation_elementary_mathematics 93 | task1428_country_surface_area 94 | task1656_gooaq_answer_generation 95 | task300_storycloze_order_generation 96 | task087_new_operator_addsub_arithmetic 97 | task085_unnatural_addsub_arithmetic 98 | task499_extract_and_add_all_numbers_from_list 99 | task1317_country_calling_code 100 | task752_svamp_multiplication_question_answering 101 | task1726_mathqa_correct_answer_generation 102 | task745_ai2_arithmetic_questions_arithmetic 103 | task1293_kilt_tasks_hotpotqa_question_answering 104 | task866_mawps_multidiv_question_answering 105 | task861_asdiv_addsub_question_answering 106 | task344_hybridqa_answer_generation 107 | task864_asdiv_singleop_question_answering 108 | task157_count_vowels_and_consonants 109 | task093_conala_normalize_lists 110 | task863_asdiv_multiop_question_answering 111 | task278_stereoset_sentence_generation_antistereotype 112 | task1217_atomic_answer_generation 113 | task277_stereoset_sentence_generation_stereotype 114 | task1318_country_national_dish 115 | task1425_country_iso_numeric 116 | task1319_country_by_barcode_prefix 117 | task1564_triviaqa_answer_generation 118 | task095_conala_max_absolute_value 119 | task1315_find_range_array 120 | task865_mawps_addsub_question_answering 121 | task090_equation_learner_algebra 122 | task1446_farthest_integers 123 | task867_mawps_multiop_question_answering 124 | task753_svamp_addition_question_answering 125 | task582_naturalquestion_answer_generation 126 | task504_count_all_alphabetical_elements_in_list 127 | task898_freebase_qa_answer_generation 128 | task1405_find_median 129 | task345_hybridqa_answer_generation 130 | task081_piqa_wrong_answer_generation 131 | task565_circa_answer_generation 132 | task598_cuad_answer_generation 133 | task899_freebase_qa_topic_generation 134 | task080_piqa_answer_generation 135 | task956_leetcode_420_strong_password_check 136 | task105_story_cloze-rocstories_sentence_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/LowPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/LowPerplexity_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | task341_winomt_classification_gender_anti 70 | task902_deceptive_opinion_spam_classification 71 | task1312_amazonreview_polarity_classification 72 | task286_olid_offense_judgment 73 | task428_senteval_inversion 74 | task1495_adverse_drug_event_classification 75 | task364_regard_social_impact_classification 76 | task925_coached_conv_pref_classifier 77 | task387_semeval_2018_task3_irony_classification 78 | task384_socialiqa_question_classification 79 | task766_craigslist_bargains_classification 80 | task323_jigsaw_classification_sexually_explicit 81 | task875_emotion_classification 82 | task195_sentiment140_classification 83 | task386_semeval_2018_task3_irony_detection 84 | task1706_ljspeech_classification 85 | task1313_amazonreview_polarity_classification 86 | task346_hybridqa_classification 87 | task929_products_reviews_classification 88 | task274_overruling_legal_classification 89 | task350_winomt_classification_gender_identifiability_pro 90 | task1308_amazonreview_category_classification 91 | task1605_ethos_text_classification 92 | task673_google_wellformed_query_classification 93 | task369_synthetic_remove_odds 94 | task205_remove_even_elements 95 | task370_synthetic_remove_divisible_by_3 96 | task153_tomqa_find_location_hard_clean 97 | task366_synthetic_return_primes 98 | task1151_swap_max_min 99 | task125_conala_pair_differences 100 | task547_alt_translation_entk_en 101 | task1331_reverse_array 102 | task1340_msr_text_compression_compression 103 | task372_synthetic_palindrome_numbers 104 | task151_tomqa_find_location_easy_clean 105 | task515_senteval_odd_word_out 106 | task207_max_element_lists 107 | task560_alt_translation_en_entk 108 | task1316_remove_duplicates_string 109 | task1150_delete_max_min 110 | task1568_propara_classification 111 | task586_amazonfood_polarity_classification 112 | task1705_ljspeech_classification 113 | task577_curiosity_dialogs_classification 114 | task123_conala_sort_dictionary 115 | task926_coached_conv_pref_word_generation 116 | task573_air_dialogue_classification 117 | task1355_sent_comp_summarization 118 | task1338_peixian_equity_evaluation_corpus_sentiment_classifier 119 | task1087_two_number_sum 120 | task076_splash_correcting_sql_mistake 121 | task1506_celebrity_minimal_dob_span 122 | task1444_round_power_of_two 123 | task755_find_longest_substring_and_replace_its_sorted_lowercase_version_in_both_lists 124 | task934_turk_simplification 125 | task084_babi_t1_single_supporting_fact_identify_relevant_fact 126 | task1580_eqasc-perturbed_question_generation 127 | task378_reverse_words_of_given_length 128 | task111_asset_sentence_simplification 129 | task1704_ljspeech_textmodification 130 | task096_conala_list_index_subtraction 131 | task955_wiki_auto_style_transfer 132 | task181_outcome_extraction 133 | task1508_wordnet_antonyms 134 | task868_cfq_mcd1_explanation_to_sql 135 | task675_google_wellformed_query_sentence_generation 136 | task1453_person_entity_extraction_btc_corpus 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/Random_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/Random_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | task138_detoxifying-lms_classification_fluency 70 | task875_emotion_classification 71 | task322_jigsaw_classification_threat 72 | task855_conv_ai_2_classification 73 | task456_matres_intention_classification 74 | task1429_evalution_semantic_relation_classification 75 | task1149_item_check_edible 76 | task665_mmmlu_answer_generation_anatomy 77 | task475_yelp_polarity_classification 78 | task1167_penn_treebank_coarse_pos_tagging 79 | task721_mmmlu_answer_generation_medical_genetics 80 | task1089_check_monotonic_array 81 | task476_cls_english_books_classification 82 | task1560_blimp_binary_classification 83 | task1725_civil_comments_severtoxicity_classification 84 | task1168_brown_coarse_pos_tagging 85 | task156_codah_classification_adversarial 86 | task398_semeval_2018_task1_tweet_joy_detection 87 | task337_hateeval_classification_individual_en 88 | task114_is_the_given_word_longest 89 | task1645_medical_question_pair_dataset_text_classification 90 | task1216_atomic_classification_causes 91 | task285_imdb_answer_generation 92 | task403_creak_commonsense_inference 93 | task085_unnatural_addsub_arithmetic 94 | task366_synthetic_return_primes 95 | task303_record_incorrect_answer_generation 96 | task381_boolq_question_generation 97 | task1412_web_questions_question_answering 98 | task1705_ljspeech_classification 99 | task573_air_dialogue_classification 100 | task547_alt_translation_entk_en 101 | task489_mwsc_question_generation 102 | task492_mwsc_incorrect_answer_generation 103 | task1481_gene_extraction_bc2gm_dataset 104 | task594_sciq_question_generation 105 | task170_hotpotqa_answer_generation 106 | task343_winomt_classification_profession_anti 107 | task1320_country_domain_tld 108 | task1321_country_continent 109 | task1601_webquestions_answer_generation 110 | task1596_event2mind_text_generation_2 111 | task067_abductivenli_answer_generation 112 | task867_mawps_multiop_question_answering 113 | task1486_cell_extraction_anem_dataset 114 | task310_race_classification 115 | task1482_gene_extraction_chemprot_dataset 116 | task389_torque_generate_temporal_question 117 | task1444_round_power_of_two 118 | task1088_array_of_products 119 | task375_classify_type_of_sentence_in_debate 120 | task694_mmmlu_answer_generation_econometrics 121 | task735_mmmlu_answer_generation_us_foreign_policy 122 | task1499_dstc3_summarization 123 | task1542_every_ith_element_from_starting 124 | task1501_dstc3_answer_generation 125 | task1382_quarel_write_correct_answer 126 | task491_mwsc_answer_generation 127 | task068_abductivenli_incorrect_answer_generation 128 | task382_hybridqa_answer_generation 129 | task1731_quartz_question_answering 130 | task638_multi_woz_classification 131 | task1425_country_iso_numeric 132 | task1379_quarel_incorrect_answer_generation 133 | task193_duorc_question_generation 134 | task379_agnews_topic_classification 135 | task155_count_nouns_verbs 136 | task182_duorc_question_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_20/init/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task367_synthetic_remove_floats 2 | task377_remove_words_of_given_length 3 | task471_haspart_answer_generation 4 | task888_reviews_classification 5 | task724_mmmlu_answer_generation_moral_scenarios 6 | task862_asdiv_multidiv_question_answering 7 | task1426_country_independence_year 8 | task522_news_editorial_summary 9 | task1404_date_conversion 10 | task512_twitter_emotion_classification 11 | task921_code_x_glue_information_retreival 12 | task154_tomqa_find_location_hard_noise 13 | task340_winomt_classification_gender_pro 14 | task1333_check_validity_date_ddmmyyyy 15 | task347_hybridqa_incorrect_answer_generation 16 | task359_casino_classification_negotiation_vouch_fair 17 | task1401_obqa_sentence_generation 18 | task563_discofuse_answer_generation 19 | task297_storycloze_incorrect_end_classification 20 | task672_nummersense 21 | task1369_healthfact_sentence_generation 22 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 23 | task044_essential_terms_identifying_essential_words 24 | task1193_food_course_classification 25 | task144_subjqa_question_answering 26 | task094_conala_calculate_mean 27 | task1188_count_max_freq_char 28 | task703_mmmlu_answer_generation_high_school_geography 29 | task616_cola_classification 30 | task1669_md_gender_bias_text_modification 31 | task152_tomqa_find_location_easy_noise 32 | task089_swap_words_verification 33 | task161_count_words_containing_letter 34 | task1567_propara_question_generation 35 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 36 | task1289_trec_classification 37 | task625_xlwic_true_or_false_answer_generation 38 | task1214_atomic_classification_xwant 39 | task1443_string_to_number 40 | task160_replace_letter_in_a_sentence 41 | task100_concatenate_all_elements_from_index_i_to_j 42 | task319_stereoset_classification_profession 43 | task168_strategyqa_question_decomposition 44 | task833_poem_sentiment_classification 45 | task1418_bless_semantic_relation_classification 46 | task846_pubmedqa_classification 47 | task1419_mathqa_gain 48 | task516_senteval_conjoints_inversion 49 | task746_yelp_restaurant_review_classification 50 | task708_mmmlu_answer_generation_high_school_physics 51 | task1553_cnn_dailymail_summarization 52 | task178_quartz_question_answering 53 | task1314_country_abbreviation 54 | task713_mmmlu_answer_generation_human_aging 55 | task1721_civil_comments_obscenity_classification 56 | task906_dialogre_identify_names 57 | task1378_quarel_correct_answer_generation 58 | task592_sciq_incorrect_answer_generation 59 | task1517_limit_classfication 60 | task296_storycloze_correct_end_classification 61 | task709_mmmlu_answer_generation_high_school_psychology 62 | task1288_glue_mrpc_paraphrasing 63 | task228_arc_answer_generation_easy 64 | task819_pec_sentiment_classification 65 | task706_mmmlu_answer_generation_high_school_mathematics 66 | task1729_personachat_generate_next 67 | task1291_multi_news_summarization 68 | task608_sbic_sexual_offense_binary_classification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | task143_odd-man-out_classification_generate_category 70 | task875_emotion_classification 71 | task1354_sent_comp_classification 72 | task212_logic2text_classification 73 | task584_udeps_eng_fine_pos_tagging 74 | task1500_dstc3_classification 75 | task900_freebase_qa_category_classification 76 | task309_race_answer_generation 77 | task280_stereoset_classification_stereotype_type 78 | task583_udeps_eng_coarse_pos_tagging 79 | task1429_evalution_semantic_relation_classification 80 | task1418_bless_semantic_relation_classification 81 | task521_trivia_question_classification 82 | task682_online_privacy_policy_text_classification 83 | task889_goemotions_classification 84 | task617_amazonreview_category_text_generation 85 | task1167_penn_treebank_coarse_pos_tagging 86 | task1434_head_qa_classification 87 | task679_hope_edi_english_text_classification 88 | task337_hateeval_classification_individual_en 89 | task317_crows-pairs_classification_stereotype_type 90 | task1289_trec_classification 91 | task564_discofuse_classification 92 | task428_senteval_inversion 93 | task681_hope_edi_malayalam_text_classification 94 | task598_cuad_answer_generation 95 | task631_dbpedia_14_incorrect_answer_generation 96 | task382_hybridqa_answer_generation 97 | task028_drop_answer_generation 98 | task276_enhanced_wsc_classification 99 | task599_cuad_question_generation 100 | task1338_peixian_equity_evaluation_corpus_sentiment_classifier 101 | task1427_country_region_in_world 102 | task170_hotpotqa_answer_generation 103 | task061_ropes_answer_generation 104 | task1607_ethos_text_classification 105 | task834_mathdataset_classification 106 | task194_duorc_answer_generation 107 | task1498_24hour_to_12hour_clock 108 | task597_cuad_answer_generation 109 | task180_intervention_extraction 110 | task517_emo_classify_emotion_of_dialogue 111 | task1322_country_government_type 112 | task684_online_privacy_policy_text_information_type_generation 113 | task191_hotpotqa_question_generation 114 | task389_torque_generate_temporal_question 115 | task547_alt_translation_entk_en 116 | task342_winomt_classification_profession_pro 117 | task068_abductivenli_incorrect_answer_generation 118 | task141_odd-man-out_classification_category 119 | task742_lhoestq_answer_generation_frequency 120 | task1194_kth_largest_element 121 | task110_logic2text_sentence_generation 122 | task1487_organism_substance_extraction_anem_dataset 123 | task269_csrg_counterfactual_story_generation 124 | task165_mcscript_question_answering_commonsense 125 | task399_semeval_2018_task1_tweet_sadness_detection 126 | task023_cosmosqa_question_generation 127 | task1581_eqasc-perturbed_answer_generation 128 | task183_rhyme_generation 129 | task388_torque_token_classification 130 | task142_odd-man-out_classification_no_category 131 | task375_classify_type_of_sentence_in_debate 132 | task129_scan_long_text_generation_action_command_short 133 | task1399_obqa_answer_generation 134 | task379_agnews_topic_classification 135 | task347_hybridqa_incorrect_answer_generation 136 | task594_sciq_question_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_5/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/HighPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/HighPerplexity_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | task900_freebase_qa_category_classification 70 | task1503_hatexplain_classification 71 | task1361_movierationales_classification 72 | task1167_penn_treebank_coarse_pos_tagging 73 | task1418_bless_semantic_relation_classification 74 | task1347_glue_sts-b_similarity_classification 75 | task043_essential_terms_answering_incomplete_questions 76 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 77 | task073_commonsenseqa_answer_generation 78 | task1593_yahoo_answers_topics_classification 79 | task285_imdb_answer_generation 80 | task712_mmmlu_answer_generation_high_school_world_history 81 | task119_semeval_2019_task10_geometric_mathematical_answer_generation 82 | task309_race_answer_generation 83 | task664_mmmlu_answer_generation_abstract_algebra 84 | task1366_healthfact_classification 85 | task733_mmmlu_answer_generation_security_studies 86 | task617_amazonreview_category_text_generation 87 | task693_mmmlu_answer_generation_conceptual_physics 88 | task699_mmmlu_answer_generation_high_school_biology 89 | task1389_hellaswag_completion 90 | task665_mmmlu_answer_generation_anatomy 91 | task732_mmmlu_answer_generation_public_relations 92 | task729_mmmlu_answer_generation_professional_law 93 | task1428_country_surface_area 94 | task1656_gooaq_answer_generation 95 | task499_extract_and_add_all_numbers_from_list 96 | task300_storycloze_order_generation 97 | task862_asdiv_multidiv_question_answering 98 | task085_unnatural_addsub_arithmetic 99 | task752_svamp_multiplication_question_answering 100 | task1726_mathqa_correct_answer_generation 101 | task1425_country_iso_numeric 102 | task864_asdiv_singleop_question_answering 103 | task1426_country_independence_year 104 | task866_mawps_multidiv_question_answering 105 | task868_mawps_singleop_question_answering 106 | task090_equation_learner_algebra 107 | task865_mawps_addsub_question_answering 108 | task344_hybridqa_answer_generation 109 | task1318_country_national_dish 110 | task863_asdiv_multiop_question_answering 111 | task1088_array_of_products 112 | task867_mawps_multiop_question_answering 113 | task745_ai2_arithmetic_questions_arithmetic 114 | task1293_kilt_tasks_hotpotqa_question_answering 115 | task753_svamp_addition_question_answering 116 | task1406_kth_smallest_element 117 | task157_count_vowels_and_consonants 118 | task1319_country_by_barcode_prefix 119 | task637_extract_and_sort_unique_digits_in_a_list 120 | task1315_find_range_array 121 | task1446_farthest_integers 122 | task277_stereoset_sentence_generation_stereotype 123 | task615_moviesqa_answer_generation 124 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 125 | task1443_string_to_number 126 | task751_svamp_subtraction_question_answering 127 | task1405_find_median 128 | task471_haspart_answer_generation 129 | task835_mathdataset_answer_generation 130 | task244_count_elements_in_set_union 131 | task626_xlwic_sentence_based_on_given_word_sentence_generation 132 | task898_freebase_qa_answer_generation 133 | task504_count_all_alphabetical_elements_in_list 134 | task1564_triviaqa_answer_generation 135 | task754_svamp_common-division_question_answering 136 | task183_rhyme_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/LowPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/LowPerplexity_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | task1605_ethos_text_classification 70 | task322_jigsaw_classification_threat 71 | task326_jigsaw_classification_obscene 72 | task1706_ljspeech_classification 73 | task387_semeval_2018_task3_irony_classification 74 | task493_review_polarity_classification 75 | task1312_amazonreview_polarity_classification 76 | task477_cls_english_dvd_classification 77 | task286_olid_offense_judgment 78 | task902_deceptive_opinion_spam_classification 79 | task516_senteval_conjoints_inversion 80 | task616_cola_classification 81 | task523_find_if_numbers_or_alphabets_are_more_in_list 82 | task325_jigsaw_classification_identity_attack 83 | task475_yelp_polarity_classification 84 | task284_imdb_classification 85 | task386_semeval_2018_task3_irony_detection 86 | task1495_adverse_drug_event_classification 87 | task1606_ethos_text_classification 88 | task430_senteval_subject_count 89 | task1489_sarcasmdetection_tweet_classification 90 | task625_xlwic_true_or_false_answer_generation 91 | task1488_sarcasmdetection_headline_classification 92 | task350_winomt_classification_gender_identifiability_pro 93 | task547_alt_translation_entk_en 94 | task1355_sent_comp_summarization 95 | task377_remove_words_of_given_length 96 | task1340_msr_text_compression_compression 97 | task152_tomqa_find_location_easy_noise 98 | task1704_ljspeech_textmodification 99 | task586_amazonfood_polarity_classification 100 | task1506_celebrity_minimal_dob_span 101 | task770_pawsx_english_text_modification 102 | task933_wiki_auto_style_transfer 103 | task132_dais_text_modification 104 | task123_conala_sort_dictionary 105 | task376_reverse_order_of_words 106 | task366_synthetic_return_primes 107 | task1447_drug_extraction_ade 108 | task181_outcome_extraction 109 | task192_hotpotqa_sentence_generation 110 | task276_enhanced_wsc_classification 111 | task1151_swap_max_min 112 | task1364_hans_answer_generation 113 | task1568_propara_classification 114 | task275_enhanced_wsc_paraphrase_generation 115 | task859_prost_question_generation 116 | task1504_hatexplain_answer_generation 117 | task608_sbic_sexual_offense_binary_classification 118 | task563_discofuse_answer_generation 119 | task675_google_wellformed_query_sentence_generation 120 | task955_wiki_auto_style_transfer 121 | task378_reverse_words_of_given_length 122 | task755_find_longest_substring_and_replace_its_sorted_lowercase_version_in_both_lists 123 | task1604_ethos_text_classification 124 | task1404_date_conversion 125 | task320_stereoset_classification_race 126 | task208_combinations_of_list 127 | task1452_location_entity_extraction_btc_corpus 128 | task1285_kpa_keypoint_matching 129 | task413_mickey_en_sentence_perturbation_generation 130 | task154_tomqa_find_location_hard_noise 131 | task372_synthetic_palindrome_numbers 132 | task874_opus_xhosanavy_sr 133 | task577_curiosity_dialogs_classification 134 | task1150_delete_max_min 135 | task1573_samsum_classification 136 | task076_splash_correcting_sql_mistake 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/Random_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/Random_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | task688_mmmlu_answer_generation_college_computer_science 70 | task1725_civil_comments_severtoxicity_classification 71 | task327_jigsaw_classification_toxic 72 | task1288_glue_mrpc_paraphrasing 73 | task667_mmmlu_answer_generation_business_ethics 74 | task108_contextualabusedetection_classification 75 | task1210_atomic_classification_madeupof 76 | task848_pubmedqa_classification 77 | task428_senteval_inversion 78 | task1312_amazonreview_polarity_classification 79 | task869_cfq_mcd1_sql_to_explanation 80 | task583_udeps_eng_coarse_pos_tagging 81 | task1727_wiqa_what_is_the_effect 82 | task701_mmmlu_answer_generation_high_school_computer_science 83 | task429_senteval_tense 84 | task929_products_reviews_classification 85 | task494_review_polarity_answer_generation 86 | task1429_evalution_semantic_relation_classification 87 | task1366_healthfact_classification 88 | task462_qasper_classification 89 | task875_emotion_classification 90 | task386_semeval_2018_task3_irony_detection 91 | task1216_atomic_classification_causes 92 | task1488_sarcasmdetection_headline_classification 93 | task742_lhoestq_answer_generation_frequency 94 | task1321_country_continent 95 | task1603_smcalflow_sentence_generation 96 | task1406_kth_smallest_element 97 | task736_mmmlu_answer_generation_virology 98 | task628_xlwic_word_with_different_meaning_sentence_generation 99 | task453_swag_answer_generation 100 | task1322_country_government_type 101 | task1504_hatexplain_answer_generation 102 | task750_aqua_multiple_choice_answering 103 | task085_unnatural_addsub_arithmetic 104 | task1325_qa_zre_question_generation_on_subject_relation 105 | task376_reverse_order_of_words 106 | task849_pubmedqa_answer_generation 107 | task095_conala_max_absolute_value 108 | task1572_samsum_summary 109 | task906_dialogre_identify_names 110 | task499_extract_and_add_all_numbers_from_list 111 | task142_odd-man-out_classification_no_category 112 | task927_yelp_negative_to_positive_style_transfer 113 | task062_bigbench_repeat_copy_logic 114 | task1336_peixian_equity_evaluation_corpus_gender_classifier 115 | task1479_organization_entity_extraction_btc_corpus 116 | task303_record_incorrect_answer_generation 117 | task719_mmmlu_answer_generation_management 118 | task1567_propara_question_generation 119 | task897_freebase_qa_topic_question_generation 120 | task605_find_the_longest_common_subsequence_in_two_lists 121 | task154_tomqa_find_location_hard_noise 122 | task1517_limit_classfication 123 | task565_circa_answer_generation 124 | task130_scan_structured_text_generation_command_action_long 125 | task615_moviesqa_answer_generation 126 | task563_discofuse_answer_generation 127 | task1520_qa_srl_answer_generation 128 | task835_mathdataset_answer_generation 129 | task388_torque_token_classification 130 | task752_svamp_multiplication_question_answering 131 | task1564_triviaqa_answer_generation 132 | task458_matres_negation_classification 133 | task399_semeval_2018_task1_tweet_sadness_detection 134 | task183_rhyme_generation 135 | task1412_web_questions_question_answering 136 | task276_enhanced_wsc_classification 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_30/init/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1601_webquestions_answer_generation 2 | task1328_qa_zre_relation_generation_from_question 3 | task686_mmmlu_answer_generation_college_biology 4 | task861_prost_mcq_answers_generation 5 | task301_record_question_generation 6 | task761_app_review_classification 7 | task672_amazon_and_yelp_summarization_dataset_summarization 8 | task821_protoqa_question_generation 9 | task156_codah_classification_adversarial 10 | task687_mmmlu_answer_generation_college_chemistry 11 | task588_amazonfood_rating_classification 12 | task046_miscellaneous_question_typing 13 | task390_torque_text_span_selection 14 | task1445_closest_integers 15 | task116_com2sense_commonsense_reasoning 16 | task1316_remove_duplicates_string 17 | task1566_propara_structured_text_generation 18 | task649_race_blank_question_generation 19 | task070_abductivenli_incorrect_classification 20 | task1712_poki_classification 21 | task296_storycloze_correct_end_classification 22 | task024_cosmosqa_answer_generation 23 | task278_stereoset_sentence_generation_antistereotype 24 | task385_socialiqa_incorrect_answer_generation 25 | task923_event2mind_classifier 26 | task160_replace_letter_in_a_sentence 27 | task1383_quarel_write_incorrect_answer 28 | task568_circa_question_generation 29 | task311_race_question_generation 30 | task1341_msr_text_classification 31 | task1317_country_calling_code 32 | task059_ropes_story_generation 33 | task151_tomqa_find_location_easy_clean 34 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 35 | task1501_dstc3_answer_generation 36 | task489_mwsc_question_generation 37 | task1553_cnn_dailymail_summarization 38 | task708_mmmlu_answer_generation_high_school_physics 39 | task934_turk_simplification 40 | task1190_add_integer_to_list 41 | task600_find_the_longest_common_substring_in_two_strings 42 | task820_protoqa_answer_generation 43 | task1135_xcsr_en_commonsense_mc_classification 44 | task861_asdiv_addsub_question_answering 45 | task071_abductivenli_answer_generation 46 | task560_alt_translation_en_entk 47 | task924_event2mind_word_generation 48 | task716_mmmlu_answer_generation_jurisprudence 49 | task405_narrativeqa_question_generation 50 | task1291_multi_news_summarization 51 | task1669_md_gender_bias_text_modification 52 | task618_amazonreview_summary_text_generation 53 | task1703_ljspeech_textmodification 54 | task365_synthetic_remove_vowels 55 | task328_jigsaw_classification_insult 56 | task478_cls_english_music_classification 57 | task730_mmmlu_answer_generation_professional_medicine 58 | task168_strategyqa_question_decomposition 59 | task1565_triviaqa_classification 60 | task706_mmmlu_answer_generation_high_school_mathematics 61 | task1320_country_domain_tld 62 | task1729_personachat_generate_next 63 | task245_check_presence_in_set_intersection 64 | task1217_atomic_answer_generation 65 | task002_quoref_answer_generation 66 | task1431_head_qa_answer_generation 67 | task1705_ljspeech_classification 68 | task574_air_dialogue_sentence_generation 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | task1361_movierationales_classification 70 | task1366_healthfact_classification 71 | task583_udeps_eng_coarse_pos_tagging 72 | task143_odd-man-out_classification_generate_category 73 | task1354_sent_comp_classification 74 | task1289_trec_classification 75 | task521_trivia_question_classification 76 | task584_udeps_eng_fine_pos_tagging 77 | task889_goemotions_classification 78 | task1200_atomic_classification_xeffect 79 | task1201_atomic_classification_xintent 80 | task027_drop_answer_type_generation 81 | task245_check_presence_in_set_intersection 82 | task069_abductivenli_classification 83 | task429_senteval_tense 84 | task046_miscellaneous_question_typing 85 | task1503_hatexplain_classification 86 | task1434_head_qa_classification 87 | task317_crows-pairs_classification_stereotype_type 88 | task321_stereoset_classification_religion 89 | task900_freebase_qa_category_classification 90 | task767_craigslist_bargains_classification 91 | task1727_wiqa_what_is_the_effect 92 | task337_hateeval_classification_individual_en 93 | task598_cuad_answer_generation 94 | task681_hope_edi_malayalam_text_classification 95 | task599_cuad_question_generation 96 | task1194_kth_largest_element 97 | task182_duorc_question_generation 98 | task1291_multi_news_summarization 99 | task1322_country_government_type 100 | task906_dialogre_identify_names 101 | task028_drop_answer_generation 102 | task310_race_classification 103 | task1451_drug_dose_extraction 104 | task302_record_classification 105 | task515_senteval_odd_word_out 106 | task370_synthetic_remove_divisible_by_3 107 | task1510_evalution_relation_extraction 108 | task585_preposition_classification 109 | task194_duorc_answer_generation 110 | task382_hybridqa_answer_generation 111 | task1498_24hour_to_12hour_clock 112 | task631_dbpedia_14_incorrect_answer_generation 113 | task1369_healthfact_sentence_generation 114 | task457_matres_conditional_classification 115 | task1427_country_region_in_world 116 | task379_agnews_topic_classification 117 | task183_rhyme_generation 118 | task517_emo_classify_emotion_of_dialogue 119 | task834_mathdataset_classification 120 | task458_matres_negation_classification 121 | task573_air_dialogue_classification 122 | task193_duorc_question_generation 123 | task1487_organism_substance_extraction_anem_dataset 124 | task367_synthetic_remove_floats 125 | task547_alt_translation_entk_en 126 | task1205_atomic_classification_isafter 127 | task606_sum_of_all_numbers_in_list_between_positions_i_and_j 128 | task1585_root09_hypernym_generation 129 | task127_scan_long_text_generation_action_command_all 130 | task849_pubmedqa_answer_generation 131 | task142_odd-man-out_classification_no_category 132 | task595_mocha_answer_generation 133 | task318_stereoset_classification_gender 134 | task347_hybridqa_incorrect_answer_generation 135 | task1292_yelp_review_full_text_categorization 136 | task1580_eqasc-perturbed_question_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/HighPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/LowPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/LowPerplexity_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | task1354_sent_comp_classification 70 | task516_senteval_conjoints_inversion 71 | task1605_ethos_text_classification 72 | task112_asset_simple_sentence_identification 73 | task387_semeval_2018_task3_irony_classification 74 | task286_olid_offense_judgment 75 | task1210_atomic_classification_madeupof 76 | task1727_wiqa_what_is_the_effect 77 | task400_paws_paraphrase_classification 78 | task495_semeval_headline_classification 79 | task1606_ethos_text_classification 80 | task316_crows-pairs_classification_stereotype 81 | task616_cola_classification 82 | task022_cosmosqa_passage_inappropriate_binary 83 | task630_dbpedia_14_classification 84 | task326_jigsaw_classification_obscene 85 | task682_online_privacy_policy_text_classification 86 | task843_financial_phrasebank_classification 87 | task146_afs_argument_similarity_gun_control 88 | task1209_atomic_classification_objectuse 89 | task1502_hatexplain_classification 90 | task145_afs_argument_similarity_death_penalty 91 | task324_jigsaw_classification_disagree 92 | task761_app_review_classification 93 | task132_dais_text_modification 94 | task770_pawsx_english_text_modification 95 | task1340_msr_text_compression_compression 96 | task111_asset_sentence_simplification 97 | task366_synthetic_return_primes 98 | task1404_date_conversion 99 | task377_remove_words_of_given_length 100 | task063_first_i_elements 101 | task1604_ethos_text_classification 102 | task1355_sent_comp_summarization 103 | task547_alt_translation_entk_en 104 | task378_reverse_words_of_given_length 105 | task933_wiki_auto_style_transfer 106 | task152_tomqa_find_location_easy_noise 107 | task1508_wordnet_antonyms 108 | task068_abductivenli_incorrect_answer_generation 109 | task560_alt_translation_en_entk 110 | task488_extract_all_alphabetical_elements_from_list_in_order 111 | task610_conllpp_ner 112 | task868_cfq_mcd1_explanation_to_sql 113 | task934_turk_simplification 114 | task151_tomqa_find_location_easy_clean 115 | task955_wiki_auto_style_transfer 116 | task497_extract_all_numbers_from_list_in_order 117 | task154_tomqa_find_location_hard_noise 118 | task675_google_wellformed_query_sentence_generation 119 | task509_collate_of_all_alphabetical_and_numerical_elements_in_list_separately 120 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 121 | task605_find_the_longest_common_subsequence_in_two_lists 122 | task1447_drug_extraction_ade 123 | task123_conala_sort_dictionary 124 | task181_outcome_extraction 125 | task594_sciq_question_generation 126 | task413_mickey_en_sentence_perturbation_generation 127 | task1151_swap_max_min 128 | task1542_every_ith_element_from_starting 129 | task636_extract_and_sort_unique_alphabets_in_a_list 130 | task101_reverse_and_concatenate_all_elements_from_index_i_to_j 131 | task563_discofuse_answer_generation 132 | task1212_atomic_classification_hasproperty 133 | task1704_ljspeech_textmodification 134 | task1215_atomic_classification_capableof 135 | task576_curiosity_dialogs_answer_generation 136 | task674_google_wellformed_query_sentence_generation 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/Random_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/Random_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | task664_mmmlu_answer_generation_abstract_algebra 70 | task1283_hrngo_quality_classification 71 | task710_mmmlu_answer_generation_high_school_statistics 72 | task1502_hatexplain_classification 73 | task1203_atomic_classification_xreact 74 | task844_financial_phrasebank_classification 75 | task1289_trec_classification 76 | task854_hippocorpus_classification 77 | task904_hate_speech_offensive_classification 78 | task732_mmmlu_answer_generation_public_relations 79 | task043_essential_terms_answering_incomplete_questions 80 | task322_jigsaw_classification_threat 81 | task478_cls_english_music_classification 82 | task514_argument_consequence_classification 83 | task496_semeval_answer_generation 84 | task708_mmmlu_answer_generation_high_school_physics 85 | task1186_nne_hrngo_classification 86 | task1361_movierationales_classification 87 | task022_cosmosqa_passage_inappropriate_binary 88 | task476_cls_english_books_classification 89 | task118_semeval_2019_task10_open_vocabulary_mathematical_answer_generation 90 | task902_deceptive_opinion_spam_classification 91 | task625_xlwic_true_or_false_answer_generation 92 | task1559_blimp_binary_classification 93 | task094_conala_calculate_mean 94 | task1445_closest_integers 95 | task867_mawps_multiop_question_answering 96 | task608_sbic_sexual_offense_binary_classification 97 | task405_narrativeqa_question_generation 98 | task853_hippocorpus_long_text_generation 99 | task1310_amazonreview_rating_classification 100 | task1703_ljspeech_textmodification 101 | task385_socialiqa_incorrect_answer_generation 102 | task1499_dstc3_summarization 103 | task072_abductivenli_answer_generation 104 | task1573_samsum_classification 105 | task1520_qa_srl_answer_generation 106 | task181_outcome_extraction 107 | task1383_quarel_write_incorrect_answer 108 | task933_wiki_auto_style_transfer 109 | task596_mocha_question_generation 110 | task311_race_question_generation 111 | task1320_country_domain_tld 112 | task567_circa_text_generation 113 | task866_mawps_multidiv_question_answering 114 | task870_msmarco_answer_generation 115 | task276_enhanced_wsc_classification 116 | task208_combinations_of_list 117 | task024_cosmosqa_answer_generation 118 | task1285_kpa_keypoint_matching 119 | task755_find_longest_substring_and_replace_its_sorted_lowercase_version_in_both_lists 120 | task1368_healthfact_sentence_generation 121 | task754_svamp_common-division_question_answering 122 | task488_extract_all_alphabetical_elements_from_list_in_order 123 | task461_qasper_question_generation 124 | task592_sciq_incorrect_answer_generation 125 | task300_storycloze_order_generation 126 | task381_boolq_question_generation 127 | task1428_country_surface_area 128 | task1360_numer_sense_multiple_choice_qa_generation 129 | task499_extract_and_add_all_numbers_from_list 130 | task1194_kth_largest_element 131 | task835_mathdataset_answer_generation 132 | task085_unnatural_addsub_arithmetic 133 | task868_mawps_singleop_question_answering 134 | task1426_country_independence_year 135 | task1656_gooaq_answer_generation 136 | task160_replace_letter_in_a_sentence 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_40/init/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task1572_samsum_summary 2 | task092_check_prime_classification 3 | task1216_atomic_classification_causes 4 | task875_emotion_classification 5 | task346_hybridqa_classification 6 | task354_casino_classification_negotiation_no_need 7 | task1207_atomic_classification_atlocation 8 | task279_stereoset_classification_stereotype 9 | task167_strategyqa_question_generation 10 | task153_tomqa_find_location_hard_clean 11 | task431_senteval_object_count 12 | task247_dream_answer_generation 13 | task506_position_of_all_alphabetical_elements_in_list 14 | task169_strategyqa_sentence_generation 15 | task1714_convai3_sentence_generation 16 | task589_amazonfood_summary_text_generation 17 | task719_mmmlu_answer_generation_management 18 | task090_equation_learner_algebra 19 | task044_essential_terms_identifying_essential_words 20 | task1722_civil_comments_threat_classification 21 | task709_mmmlu_answer_generation_high_school_psychology 22 | task1608_xquad_en_answer_generation 23 | task637_extract_and_sort_unique_digits_in_a_list 24 | task105_story_cloze-rocstories_sentence_generation 25 | task1308_amazonreview_category_classification 26 | task287_casehold_legal_incorrect_answer_generation 27 | task192_hotpotqa_sentence_generation 28 | task1551_every_ith_element_from_kth_element 29 | task856_conv_ai_2_classification 30 | task1706_ljspeech_classification 31 | task159_check_frequency_of_words_in_sentence_pair 32 | task1506_celebrity_minimal_dob_span 33 | task064_all_elements_except_first_i 34 | task1318_country_national_dish 35 | task211_logic2text_classification 36 | task846_pubmedqa_classification 37 | task1645_medical_question_pair_dataset_text_classification 38 | task593_sciq_explanation_generation 39 | task1339_peixian_equity_evaluation_corpus_text_completion 40 | task819_pec_sentiment_classification 41 | task723_mmmlu_answer_generation_moral_disputes 42 | task307_jeopardy_answer_generation_final 43 | task735_mmmlu_answer_generation_us_foreign_policy 44 | task1712_poki_classification 45 | task283_dream_incorrect_answer_generation 46 | task129_scan_long_text_generation_action_command_short 47 | task212_logic2text_classification 48 | task103_facts2story_long_text_generation 49 | task471_haspart_answer_generation 50 | task1725_civil_comments_severtoxicity_classification 51 | task1088_array_of_products 52 | task165_mcscript_question_answering_commonsense 53 | task1568_propara_classification 54 | task707_mmmlu_answer_generation_high_school_microeconomics 55 | task1486_cell_extraction_anem_dataset 56 | task1190_add_integer_to_list 57 | task1425_country_iso_numeric 58 | task728_mmmlu_answer_generation_professional_accounting 59 | task917_coqa_question_generation 60 | task964_librispeech_asr_text_auto_completion 61 | task147_afs_argument_similarity_gay_marriage 62 | task1364_hans_answer_generation 63 | task358_casino_classification_negotiation_uv_part 64 | task1479_organization_entity_extraction_btc_corpus 65 | task1316_remove_duplicates_string 66 | task1661_super_glue_classification 67 | task191_hotpotqa_question_generation 68 | task688_mmmlu_answer_generation_college_computer_science 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/FCGRatioPI-NL-I10-TDTE-High-0.2-Mean-abs-bald_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | task564_discofuse_classification 70 | task248_dream_classification 71 | task1200_atomic_classification_xeffect 72 | task1354_sent_comp_classification 73 | task1198_atomic_classification_owant 74 | task679_hope_edi_english_text_classification 75 | task1167_penn_treebank_coarse_pos_tagging 76 | task143_odd-man-out_classification_generate_category 77 | task1197_atomic_classification_oreact 78 | task583_udeps_eng_coarse_pos_tagging 79 | task114_is_the_given_word_longest 80 | task1366_healthfact_classification 81 | task147_afs_argument_similarity_gay_marriage 82 | task1500_dstc3_classification 83 | task159_check_frequency_of_words_in_sentence_pair 84 | task1196_atomic_classification_oeffect 85 | task869_cfq_mcd1_sql_to_explanation 86 | task069_abductivenli_classification 87 | task1361_movierationales_classification 88 | task1583_bless_meronym_classification 89 | task209_stancedetection_classification 90 | task1202_atomic_classification_xneed 91 | task115_help_advice_classification 92 | task456_matres_intention_classification 93 | task1553_cnn_dailymail_summarization 94 | task681_hope_edi_malayalam_text_classification 95 | task399_semeval_2018_task1_tweet_sadness_detection 96 | task1322_country_government_type 97 | task1501_dstc3_answer_generation 98 | task1291_multi_news_summarization 99 | task170_hotpotqa_answer_generation 100 | task457_matres_conditional_classification 101 | task597_cuad_answer_generation 102 | task182_duorc_question_generation 103 | task276_enhanced_wsc_classification 104 | task1427_country_region_in_world 105 | task287_casehold_legal_incorrect_answer_generation 106 | task1204_atomic_classification_hinderedby 107 | task599_cuad_question_generation 108 | task1368_healthfact_sentence_generation 109 | task1205_atomic_classification_isafter 110 | task191_hotpotqa_question_generation 111 | task522_news_editorial_summary 112 | task194_duorc_answer_generation 113 | task131_scan_long_text_generation_action_command_long 114 | task183_rhyme_generation 115 | task1730_personachat_choose_next 116 | task1296_wiki_hop_question_answering 117 | task906_dialogre_identify_names 118 | task129_scan_long_text_generation_action_command_short 119 | task193_duorc_question_generation 120 | task1509_evalution_antonyms 121 | task907_dialogre_identify_relationships 122 | task1290_xsum_summarization 123 | task1338_peixian_equity_evaluation_corpus_sentiment_classifier 124 | task585_preposition_classification 125 | task1510_evalution_relation_extraction 126 | task1211_atomic_classification_hassubevent 127 | task594_sciq_question_generation 128 | task028_drop_answer_generation 129 | task347_hybridqa_incorrect_answer_generation 130 | task127_scan_long_text_generation_action_command_all 131 | task1310_amazonreview_rating_classification 132 | task1369_healthfact_sentence_generation 133 | task1292_yelp_review_full_text_categorization 134 | task517_emo_classify_emotion_of_dialogue 135 | task1285_kpa_keypoint_matching 136 | task458_matres_negation_classification 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/HighPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/LowPerplexity_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/LowPerplexity_1/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | task761_app_review_classification 70 | task1207_atomic_classification_atlocation 71 | task323_jigsaw_classification_sexually_explicit 72 | task1312_amazonreview_polarity_classification 73 | task1210_atomic_classification_madeupof 74 | task1209_atomic_classification_objectuse 75 | task333_hateeval_classification_hate_en 76 | task387_semeval_2018_task3_irony_classification 77 | task1203_atomic_classification_xreact 78 | task518_emo_different_dialogue_emotions 79 | task1706_ljspeech_classification 80 | task630_dbpedia_14_classification 81 | task386_semeval_2018_task3_irony_detection 82 | task1213_atomic_classification_desires 83 | task1605_ethos_text_classification 84 | task1488_sarcasmdetection_headline_classification 85 | task475_yelp_polarity_classification 86 | task327_jigsaw_classification_toxic 87 | task1495_adverse_drug_event_classification 88 | task322_jigsaw_classification_threat 89 | task1193_food_course_classification 90 | task195_sentiment140_classification 91 | task496_semeval_answer_generation 92 | task324_jigsaw_classification_disagree 93 | task1316_remove_duplicates_string 94 | task1355_sent_comp_summarization 95 | task1340_msr_text_compression_compression 96 | task755_find_longest_substring_and_replace_its_sorted_lowercase_version_in_both_lists 97 | task1151_swap_max_min 98 | task1208_atomic_classification_xreason 99 | task586_amazonfood_polarity_classification 100 | task509_collate_of_all_alphabetical_and_numerical_elements_in_list_separately 101 | task547_alt_translation_entk_en 102 | task1215_atomic_classification_capableof 103 | task746_yelp_restaurant_review_classification 104 | task111_asset_sentence_simplification 105 | task578_curiosity_dialogs_answer_generation 106 | task076_splash_correcting_sql_mistake 107 | task610_conllpp_ner 108 | task933_wiki_auto_style_transfer 109 | task515_senteval_odd_word_out 110 | task068_abductivenli_incorrect_answer_generation 111 | task560_alt_translation_en_entk 112 | task636_extract_and_sort_unique_alphabets_in_a_list 113 | task320_stereoset_classification_race 114 | task377_remove_words_of_given_length 115 | task608_sbic_sexual_offense_binary_classification 116 | task1453_person_entity_extraction_btc_corpus 117 | task177_para-nmt_paraphrasing 118 | task078_all_elements_except_last_i 119 | task934_turk_simplification 120 | task1669_md_gender_bias_text_modification 121 | task063_first_i_elements 122 | task1542_every_ith_element_from_starting 123 | task955_wiki_auto_style_transfer 124 | task208_combinations_of_list 125 | task1551_every_ith_element_from_kth_element 126 | task622_replace_alphabets_in_a_list_by_their_position_in_english_alphabet 127 | task600_find_the_longest_common_substring_in_two_strings 128 | task1404_date_conversion 129 | task099_reverse_elements_between_index_i_and_j 130 | task868_cfq_mcd1_explanation_to_sql 131 | task1670_md_gender_bias_text_modification 132 | task103_facts2story_long_text_generation 133 | task1604_ethos_text_classification 134 | task497_extract_all_numbers_from_list_in_order 135 | task605_find_the_longest_common_subsequence_in_two_lists 136 | task1447_drug_extraction_ade 137 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/Random_0/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | -------------------------------------------------------------------------------- /reproduce_splits/TLAL_Exp0_all_50/init/train_tasks.txt: -------------------------------------------------------------------------------- 1 | task122_conala_list_index_addition 2 | task342_winomt_classification_profession_pro 3 | task104_semeval_2019_task10_closed_vocabulary_mathematical_answer_generation 4 | task326_jigsaw_classification_obscene 5 | task1194_kth_largest_element 6 | task887_quail_answer_generation 7 | task1405_find_median 8 | task870_msmarco_answer_generation 9 | task073_commonsenseqa_answer_generation 10 | task1724_civil_comments_insult_classification 11 | task328_jigsaw_classification_insult 12 | task698_mmmlu_answer_generation_global_facts 13 | task366_synthetic_return_primes 14 | task697_mmmlu_answer_generation_formal_logic 15 | task459_matres_static_classification 16 | task667_mmmlu_answer_generation_business_ethics 17 | task888_reviews_classification 18 | task1168_brown_coarse_pos_tagging 19 | task1317_country_calling_code 20 | task140_detoxifying-lms_classification_style 21 | task1146_country_capital 22 | task274_overruling_legal_classification 23 | task269_csrg_counterfactual_story_generation 24 | task428_senteval_inversion 25 | task848_pubmedqa_classification 26 | task592_sciq_incorrect_answer_generation 27 | task064_all_elements_except_first_i 28 | task1201_atomic_classification_xintent 29 | task823_peixian-rtgender_sentiment_analysis 30 | task1656_gooaq_answer_generation 31 | task123_conala_sort_dictionary 32 | task1714_convai3_sentence_generation 33 | task205_remove_even_elements 34 | task488_extract_all_alphabetical_elements_from_list_in_order 35 | task722_mmmlu_answer_generation_random_topic 36 | task845_pubmedqa_question_generation 37 | task573_air_dialogue_classification 38 | task718_mmmlu_answer_generation_machine_learning 39 | task1678_mathqa_answer_selection 40 | task1704_ljspeech_textmodification 41 | task1381_quarel_incorrect_option_generation 42 | task293_storycommonsense_emotion_text_generation 43 | task725_mmmlu_answer_generation_nutrition 44 | task716_mmmlu_answer_generation_jurisprudence 45 | task737_mmmlu_answer_generation_world_religions 46 | task291_semeval_2020_task4_commonsense_validation 47 | task516_senteval_conjoints_inversion 48 | task550_discofuse_sentence_generation 49 | task1206_atomic_classification_isbefore 50 | task730_mmmlu_answer_generation_professional_medicine 51 | task903_deceptive_opinion_spam_classification 52 | task286_olid_offense_judgment 53 | task874_opus_xhosanavy_sr 54 | task521_trivia_question_classification 55 | task709_mmmlu_answer_generation_high_school_psychology 56 | task1481_gene_extraction_bc2gm_dataset 57 | task1212_atomic_classification_hasproperty 58 | task1645_medical_question_pair_dataset_text_classification 59 | task631_dbpedia_14_incorrect_answer_generation 60 | task732_mmmlu_answer_generation_public_relations 61 | task1150_delete_max_min 62 | task1566_propara_structured_text_generation 63 | task723_mmmlu_answer_generation_moral_disputes 64 | task157_count_vowels_and_consonants 65 | task1443_string_to_number 66 | task112_asset_simple_sentence_identification 67 | task337_hateeval_classification_individual_en 68 | task770_pawsx_english_text_modification 69 | -------------------------------------------------------------------------------- /reproduce_splits/default_dev/dev_tasks.txt: -------------------------------------------------------------------------------- 1 | task1356_xlsum_title_generation 2 | task893_gap_fill_the_blank_coreference_resolution 3 | task641_esnli_classification 4 | task1529_scitail1.1_classification 5 | task202_mnli_contradiction_classification 6 | task670_ambigqa_question_generation 7 | task1393_superglue_copa_text_completion 8 | task1344_glue_entailment_classification 9 | task288_gigaword_summarization 10 | task1387_anli_r3_entailment 11 | task1664_winobias_text_generation 12 | task1161_coda19_title_generation 13 | task880_schema_guided_dstc8_classification 14 | task738_perspectrum_classification 15 | task1439_doqa_cooking_isanswerable 16 | task645_summarization 17 | task619_ohsumed_abstract_title_generation 18 | task1728_web_nlg_data_to_text 19 | task1640_aqa1.0_answerable_unanswerable_question_classification 20 | task648_answer_generation 21 | task242_tweetqa_classification 22 | task620_ohsumed_medical_subject_headings_answer_generation 23 | task1159_bard_analogical_reasoning_containers 24 | task500_scruples_anecdotes_title_generation 25 | task890_gcwd_classification 26 | task039_qasc_find_overlapping_words 27 | task1154_bard_analogical_reasoning_travel 28 | task1612_sick_label_classification 29 | task1442_doqa_movies_isanswerable 30 | task233_iirc_link_exists_classification 31 | task936_defeasible_nli_snli_classification 32 | task1386_anli_r2_entailment 33 | task1152_bard_analogical_reasoning_causation 34 | task290_tellmewhy_question_answerability 35 | task304_numeric_fused_head_resolution 36 | task760_msr_sqa_long_text_generation 37 | task035_winogrande_question_modification_person 38 | task569_recipe_nlg_text_generation 39 | task391_causal_relationship 40 | task891_gap_coreference_resolution 41 | task1586_scifact_title_generation 42 | task602_wikitext-103_answer_generation 43 | task1195_disflqa_disfluent_to_fluent_conversion 44 | task1409_dart_text_generation 45 | task033_winogrande_answer_generation 46 | task1407_dart_question_generation 47 | task402_grailqa_paraphrase_generation 48 | task201_mnli_neutral_classification 49 | task520_aquamuse_answer_given_in_passage 50 | task892_gap_reverse_coreference_resolution 51 | task828_copa_commonsense_cause_effect 52 | task769_qed_summarization 53 | task1155_bard_analogical_reasoning_trash_or_treasure 54 | task1385_anli_r1_entailment 55 | task1531_daily_dialog_type_classification 56 | task1516_imppres_naturallanguageinference 57 | task1394_meta_woz_task_classification 58 | task401_numeric_fused_head_reference 59 | task1598_nyc_long_text_generation 60 | task1615_sick_tclassify_b_relation_a 61 | task970_sherliic_causal_relationship 62 | task1390_wscfixed_coreference 63 | task199_mnli_classification 64 | task034_winogrande_question_modification_object 65 | task133_winowhy_reason_plausibility_detection 66 | task226_english_language_answer_relevance_classification 67 | task510_reddit_tifu_title_summarization 68 | task935_defeasible_nli_atomic_classification 69 | task349_squad2.0_answerable_unanswerable_question_classification 70 | task1157_bard_analogical_reasoning_rooms_for_containers 71 | task937_defeasible_nli_social_classification 72 | task743_eurlex_summarization 73 | task1388_cb_entailment 74 | task671_ambigqa_text_generation 75 | task121_zest_text_modification 76 | task1345_glue_qqp_question_paraprashing 77 | task330_gap_answer_generation 78 | task1342_amazon_us_reviews_title 79 | task329_gap_classification 80 | task281_points_of_correspondence 81 | task036_qasc_topic_word_to_generate_related_fact 82 | task1554_scitail_classification 83 | task050_multirc_answerability 84 | task362_spolin_yesand_prompt_response_sub_classification 85 | task1557_jfleg_answer_generation 86 | task249_enhanced_wsc_pronoun_disambiguation 87 | task957_e2e_nlg_text_generation_generate 88 | task418_persent_title_generation 89 | task614_glucose_cause_event_detection 90 | task677_ollie_sentence_answer_generation 91 | task220_rocstories_title_classification 92 | task1631_openpi_answer_generation 93 | task232_iirc_link_number_classification 94 | task1391_winogrande_easy_answer_generation 95 | task1358_xlsum_title_generation 96 | task1533_daily_dialog_formal_classification 97 | task1156_bard_analogical_reasoning_tools 98 | task1659_title_generation 99 | task1624_disfl_qa_question_yesno_classification 100 | task1158_bard_analogical_reasoning_manipulating_items 101 | task827_copa_commonsense_reasoning 102 | task1153_bard_analogical_reasoning_affordance 103 | task393_plausible_result_generation 104 | task879_schema_guided_dstc8_classification 105 | task613_politifact_text_generation 106 | task219_rocstories_title_answer_generation 107 | task190_snli_classification 108 | task200_mnli_entailment_classification 109 | task1534_daily_dialog_question_classification 110 | task1540_parsed_pdfs_summarization 111 | task442_com_qa_paraphrase_question_generation 112 | task392_inverse_causal_relationship 113 | task1562_zest_text_modification 114 | task640_esnli_classification 115 | task1622_disfl_qa_text_modication 116 | task623_ohsumed_yes_no_answer_generation 117 | task020_mctaco_span_based_question 118 | task642_esnli_classification 119 | task102_commongen_sentence_generation 120 | -------------------------------------------------------------------------------- /reproduce_splits/default_dev/test_tasks.txt: -------------------------------------------------------------------------------- 1 | task1356_xlsum_title_generation 2 | task893_gap_fill_the_blank_coreference_resolution 3 | task641_esnli_classification 4 | task1529_scitail1.1_classification 5 | task202_mnli_contradiction_classification 6 | task670_ambigqa_question_generation 7 | task1393_superglue_copa_text_completion 8 | task1344_glue_entailment_classification 9 | task288_gigaword_summarization 10 | task1387_anli_r3_entailment 11 | task1664_winobias_text_generation 12 | task1161_coda19_title_generation 13 | task880_schema_guided_dstc8_classification 14 | task738_perspectrum_classification 15 | task1439_doqa_cooking_isanswerable 16 | task645_summarization 17 | task619_ohsumed_abstract_title_generation 18 | task1728_web_nlg_data_to_text 19 | task1640_aqa1.0_answerable_unanswerable_question_classification 20 | task648_answer_generation 21 | task242_tweetqa_classification 22 | task620_ohsumed_medical_subject_headings_answer_generation 23 | task1159_bard_analogical_reasoning_containers 24 | task500_scruples_anecdotes_title_generation 25 | task890_gcwd_classification 26 | task039_qasc_find_overlapping_words 27 | task1154_bard_analogical_reasoning_travel 28 | task1612_sick_label_classification 29 | task1442_doqa_movies_isanswerable 30 | task233_iirc_link_exists_classification 31 | task936_defeasible_nli_snli_classification 32 | task1386_anli_r2_entailment 33 | task1152_bard_analogical_reasoning_causation 34 | task290_tellmewhy_question_answerability 35 | task304_numeric_fused_head_resolution 36 | task760_msr_sqa_long_text_generation 37 | task035_winogrande_question_modification_person 38 | task569_recipe_nlg_text_generation 39 | task391_causal_relationship 40 | task891_gap_coreference_resolution 41 | task1586_scifact_title_generation 42 | task602_wikitext-103_answer_generation 43 | task1195_disflqa_disfluent_to_fluent_conversion 44 | task1409_dart_text_generation 45 | task033_winogrande_answer_generation 46 | task1407_dart_question_generation 47 | task402_grailqa_paraphrase_generation 48 | task201_mnli_neutral_classification 49 | task520_aquamuse_answer_given_in_passage 50 | task892_gap_reverse_coreference_resolution 51 | task828_copa_commonsense_cause_effect 52 | task769_qed_summarization 53 | task1155_bard_analogical_reasoning_trash_or_treasure 54 | task1385_anli_r1_entailment 55 | task1531_daily_dialog_type_classification 56 | task1516_imppres_naturallanguageinference 57 | task1394_meta_woz_task_classification 58 | task401_numeric_fused_head_reference 59 | task1598_nyc_long_text_generation 60 | task1615_sick_tclassify_b_relation_a 61 | task970_sherliic_causal_relationship 62 | task1390_wscfixed_coreference 63 | task199_mnli_classification 64 | task034_winogrande_question_modification_object 65 | task133_winowhy_reason_plausibility_detection 66 | task226_english_language_answer_relevance_classification 67 | task510_reddit_tifu_title_summarization 68 | task935_defeasible_nli_atomic_classification 69 | task349_squad2.0_answerable_unanswerable_question_classification 70 | task1157_bard_analogical_reasoning_rooms_for_containers 71 | task937_defeasible_nli_social_classification 72 | task743_eurlex_summarization 73 | task1388_cb_entailment 74 | task671_ambigqa_text_generation 75 | task121_zest_text_modification 76 | task1345_glue_qqp_question_paraprashing 77 | task330_gap_answer_generation 78 | task1342_amazon_us_reviews_title 79 | task329_gap_classification 80 | task281_points_of_correspondence 81 | task036_qasc_topic_word_to_generate_related_fact 82 | task1554_scitail_classification 83 | task050_multirc_answerability 84 | task362_spolin_yesand_prompt_response_sub_classification 85 | task1557_jfleg_answer_generation 86 | task249_enhanced_wsc_pronoun_disambiguation 87 | task957_e2e_nlg_text_generation_generate 88 | task418_persent_title_generation 89 | task614_glucose_cause_event_detection 90 | task677_ollie_sentence_answer_generation 91 | task220_rocstories_title_classification 92 | task1631_openpi_answer_generation 93 | task232_iirc_link_number_classification 94 | task1391_winogrande_easy_answer_generation 95 | task1358_xlsum_title_generation 96 | task1533_daily_dialog_formal_classification 97 | task1156_bard_analogical_reasoning_tools 98 | task1659_title_generation 99 | task1624_disfl_qa_question_yesno_classification 100 | task1158_bard_analogical_reasoning_manipulating_items 101 | task827_copa_commonsense_reasoning 102 | task1153_bard_analogical_reasoning_affordance 103 | task393_plausible_result_generation 104 | task879_schema_guided_dstc8_classification 105 | task613_politifact_text_generation 106 | task219_rocstories_title_answer_generation 107 | task190_snli_classification 108 | task200_mnli_entailment_classification 109 | task1534_daily_dialog_question_classification 110 | task1540_parsed_pdfs_summarization 111 | task442_com_qa_paraphrase_question_generation 112 | task392_inverse_causal_relationship 113 | task1562_zest_text_modification 114 | task640_esnli_classification 115 | task1622_disfl_qa_text_modication 116 | task623_ohsumed_yes_no_answer_generation 117 | task020_mctaco_span_based_question 118 | task642_esnli_classification 119 | task102_commongen_sentence_generation 120 | -------------------------------------------------------------------------------- /reproduce_splits/xlingual/test_tasks.txt: -------------------------------------------------------------------------------- 1 | task1561_clickbait_new_bg_summarization 2 | task969_xcopa_commonsense_cause_effect_et 3 | task463_parsinlu_entailment_classification 4 | task1627_copa_hr_classification 5 | task396_persianqa_classification 6 | task1177_xcopa_commonsense_cause_effect_ta 7 | task1182_xcopa_commonsense_reasoning_vi 8 | task942_copa_mr_commonsense_reasoning 9 | task1628_copa_hr_question_answering 10 | task941_copa_gu_commonsense_cause_effect 11 | task1184_xcopa_commonsense_reasoning_zh 12 | task1170_xcopa_commonsense_reasoning_id 13 | task1171_xcopa_commonsense_cause_effect_id 14 | task939_copa_hi_commonsense_cause_effect 15 | task1185_xcopa_commonsense_cause_effect_zh 16 | task1179_xcopa_commonsense_cause_effect_th 17 | task1174_xcopa_commonsense_reasoning_sw 18 | task1175_xcopa_commonsense_cause_effect_sw 19 | task943_copa_mr_commonsense_cause_effect 20 | task1168_xcopa_commonsense_reasoning_ht 21 | task1181_xcopa_commonsense_cause_effect_tr 22 | task1180_xcopa_commonsense_reasoning_tr 23 | task968_xcopa_commonsense_reasoning_et 24 | task1172_xcopa_commonsense_reasoning_it 25 | task938_copa_hi_commonsense_reasoning 26 | task1173_xcopa_commonsense_cause_effect_it 27 | task1629_copa_hr_classification 28 | task1626_copa_hr_question_answering 29 | task1176_xcopa_commonsense_reasoning_ta 30 | task1183_xcopa_commonsense_cause_effect_vi 31 | task464_parsinlu_entailment_sentence_generation 32 | task940_copa_gu_commonsense_reasoning 33 | task1169_xcopa_commonsense_cause_effect_ht 34 | task534_farstail_entailment 35 | task1178_xcopa_commonsense_reasoning_th 36 | --------------------------------------------------------------------------------