├── .gitignore ├── Datasets.py ├── README.md ├── configs ├── GPT2 │ ├── evaluation │ │ ├── invariantLAMA │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ │ └── newQA_easy │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ └── training │ │ ├── gpt2_baseline.json │ │ ├── gpt2_kadapters.json │ │ ├── gpt2_lora.json │ │ ├── gpt2_mixreview.json │ │ └── gpt2_recadam.json ├── full_setting │ ├── evaluation │ │ ├── invariantLAMA │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ │ ├── newLAMA │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ │ ├── newLAMA_easy │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ │ └── updatedLAMA │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ └── training │ │ ├── t5_baseline.json │ │ ├── t5_kadapters.json │ │ ├── t5_lora.json │ │ ├── t5_mixreview.json │ │ ├── t5_modular_small.json │ │ └── t5_recadam.json ├── kilt │ ├── t5_baseline │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json │ ├── t5_initial │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json │ ├── t5_kadapters │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json │ ├── t5_lora │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json │ ├── t5_mixreview │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json │ ├── t5_modular_small │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json │ └── t5_recadam │ │ ├── t5_ay2.json │ │ ├── t5_cweb.json │ │ ├── t5_eli5.json │ │ ├── t5_fever.json │ │ ├── t5_hotpotqa.json │ │ ├── t5_nq.json │ │ ├── t5_tqa.json │ │ ├── t5_trex.json │ │ ├── t5_wned.json │ │ ├── t5_wow.json │ │ └── t5_zsre.json ├── small_setting │ ├── evaluation │ │ ├── invariantLAMA │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ │ ├── newLAMA_easy1 │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ │ └── newLAMA_easy2 │ │ │ ├── t5_baseline.json │ │ │ ├── t5_initial.json │ │ │ ├── t5_kadapters.json │ │ │ ├── t5_lora.json │ │ │ ├── t5_mixreview.json │ │ │ ├── t5_modular_small.json │ │ │ └── t5_recadam.json │ └── training │ │ ├── t5_baseline.json │ │ ├── t5_kadapters.json │ │ ├── t5_lora.json │ │ ├── t5_mixreview.json │ │ ├── t5_modular_small.json │ │ └── t5_recadam.json └── split │ ├── evaluation │ ├── invariantLAMA │ │ ├── t5_baseline1.json │ │ ├── t5_baseline2.json │ │ ├── t5_initial.json │ │ ├── t5_kadapters1.json │ │ ├── t5_kadapters2.json │ │ ├── t5_lora1.json │ │ ├── t5_lora2.json │ │ ├── t5_mixreview1.json │ │ ├── t5_mixreview2.json │ │ ├── t5_modular_small1.json │ │ ├── t5_modular_small2.json │ │ ├── t5_recadam1.json │ │ └── t5_recadam2.json │ ├── newLAMA_easy1 │ │ ├── t5_baseline1.json │ │ ├── t5_baseline2.json │ │ ├── t5_initial.json │ │ ├── t5_kadapters1.json │ │ ├── t5_kadapters2.json │ │ ├── t5_lora1.json │ │ ├── t5_lora2.json │ │ ├── t5_mixreview1.json │ │ ├── t5_mixreview2.json │ │ ├── t5_modular_small1.json │ │ ├── t5_modular_small2.json │ │ ├── t5_recadam1.json │ │ └── t5_recadam2.json │ └── newLAMA_easy2 │ │ ├── t5_baseline1.json │ │ ├── t5_baseline2.json │ │ ├── t5_initial.json │ │ ├── t5_kadapters1.json │ │ ├── t5_kadapters2.json │ │ ├── t5_lora1.json │ │ ├── t5_lora2.json │ │ ├── t5_mixreview1.json │ │ ├── t5_mixreview2.json │ │ ├── t5_modular_small1.json │ │ ├── t5_modular_small2.json │ │ ├── t5_recadam1.json │ │ └── t5_recadam2.json │ └── training │ ├── t5_baseline.json │ ├── t5_baseline2.json │ ├── t5_kadapter.json │ ├── t5_kadapter2.json │ ├── t5_lora.json │ ├── t5_lora2.json │ ├── t5_mixreview.json │ ├── t5_mixreview2.json │ ├── t5_modular.json │ ├── t5_modular2.json │ ├── t5_recadam.json │ └── t5_recadam2.json ├── download_all_data.py ├── download_ckl_data.py ├── download_model_checkpoints.py ├── evaluation.py ├── models ├── GPT2_Model.py ├── Kadapter_GPT2.py ├── Kadapter_T5.py ├── Kadapter_T52.py ├── Lora_GPT2.py ├── Lora_T5.py ├── Lora_T52.py ├── Modular_GPT2.py ├── Modular_Small_T5.py ├── Modular_Small_T52.py ├── Modular_T5.py ├── RecAdam.py ├── T5_Model.py └── __init__.py ├── requirements.txt └── run.py /.gitignore: -------------------------------------------------------------------------------- 1 | wandb 2 | data 3 | outputs 4 | __pycache__ 5 | model_checkpoints 6 | log 7 | ckl_data -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_baseline_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/baseline_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_initial_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/initial.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_kadapter_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/kadapter_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_lora_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/lora_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_mixreview_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/mixreview_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_modular_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/modular_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/invariantLAMA/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 50, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recadam_lama_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/recadam_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_baseline_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/baseline_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_initial_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/initial.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_kadapter_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/kadapter_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_lora_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/lora_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_mixreview_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/mixreview_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_modular_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/modular_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/evaluation/newQA_easy/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 100, 3 | "output_length" : 100, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newqa_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 8, 9 | "learning_rate" : 1e-3, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recadam_newqa_finetuning", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/GPT2/recadam_last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/training/gpt2_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/GPT2_large_recentnews(small)_lr.0001_baseline", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 1, 9 | "learning_rate" : 1e-4, 10 | "model" : "gpt2-large", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recentnews(small)_lr.0001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/training/gpt2_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/GPT2_large_recentnews(small)_lr.0001_kadapter", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-4, 10 | "model" : "gpt2-large", 11 | "method": "kadapter", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recentnews(small)_lr.0001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/training/gpt2_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/GPT2_large_recentnews(small)_lr.0001_lora", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-4, 10 | "model" : "gpt2-large", 11 | "method": "lora", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recentnews(small)_lr.0001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/training/gpt2_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/GPT2_large_recentnews(small)_lr.0001_mixreview", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-4, 10 | "model" : "gpt2-large", 11 | "method": "mixreview", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recentnews(small)_lr.0001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/GPT2/training/gpt2_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/GPT2_large_recentnews(small)_lr.0001_recadam", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 3, 9 | "learning_rate" : 1e-4, 10 | "model" : "gpt2-large", 11 | "method": "recadam", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 5, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_gpt2", 22 | "wandb_run_name" : "GPT2_large_recentnews(small)_lr.0001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/baseline_last.ckpt", 27 | "output_log" : "log/invariantLAMA/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/invariantLAMA/initial.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/kadapter_last.ckpt", 27 | "output_log" : "log/invariantLAMA/kadapter.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/lora_last.ckpt", 27 | "output_log" : "log/invariantLAMA/lora.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/mixreview_last.ckpt", 27 | "output_log" : "log/invariantLAMA/mixreview.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/modular_last.ckpt", 27 | "output_log" : "log/invariantLAMA/modular.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/invariantLAMA/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/recadam_last.ckpt", 27 | "output_log" : "log/invariantLAMA/recadam.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/baseline_last.ckpt", 27 | "output_log" : "log/newLAMA/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/newLAMA/initial.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/kadapter_last.ckpt", 27 | "output_log" : "log/newLAMA/kadapter.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/lora_last.ckpt", 27 | "output_log" : "log/newLAMA/lora.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/mixreview_last.ckpt", 27 | "output_log" : "log/newLAMA/mixreview.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/modular_last.ckpt", 27 | "output_log" : "log/newLAMA/modular.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/recadam_last.ckpt", 27 | "output_log" : "log/newLAMA/recadam.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/baseline_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/newLAMA_Easy/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/kadapter_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy/kadapter.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/lora_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy/lora.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/mixreview_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy/mixreview.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/modular_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy/modular.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/newLAMA_easy/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/recadam_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy/recadam.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/baseline_last.ckpt", 27 | "output_log" : "log/updatedLAMA/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/updatedLAMA/initial.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/kadapter_last.ckpt", 27 | "output_log" : "log/updatedLAMA/kadapter.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/lora_last.ckpt", 27 | "output_log" : "log/updatedLAMA/lora.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/mixreview_last.ckpt", 27 | "output_log" : "log/updatedLAMA/mixreview.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/modular_last.ckpt", 27 | "output_log" : "log/updatedLAMA/modular.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/evaluation/updatedLAMA/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "updatedlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/full/recadam_last.ckpt", 27 | "output_log" : "log/updatedLAMA/recadam.csv" 28 | } -------------------------------------------------------------------------------- /configs/full_setting/training/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(full)_lr.001_baseline", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/full_setting/training/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(full)_lr.001_adapters", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_adapters", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/full_setting/training/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(full)_lr.001_lora", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/full_setting/training/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(full)_lr.001_mixreview", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "mixreview", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/full_setting/training/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(full)_modular_small", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(full)_modular_small", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/full_setting/training/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(full)_lr.001_recadam_accum5", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 3, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "recadam", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 5, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(full)_lr.001_recadam_accum5", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_baseline_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 4, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_baseline_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_baseline_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/baseline.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_baseline/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_baseline_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_baseline/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_initial_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 4, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_initial_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/initial.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 2, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_initial_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/initial.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_inital_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_initial/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_initial_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_kadapters_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 4, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_kadapters_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/kadapters.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "T5_large_kadapters_eli5_finetuning_baseline", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "T5_large_kadapters_trex_finetuning_baseline", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_kadapters_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/kadapters.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_kadapters/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_kadapters_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_kadapters.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_lora_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 4, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_lora_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/lora.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "T5_large_lora_eli5_finetuning_baseline", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "T5_large_lora_trex_finetuning_baseline", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_lora_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/lora.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_lora/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_lora_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "output/last_lora.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_mixreview_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 4, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_mixreview_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/mixreview.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_mixreview_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/mixreview.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_mixreview/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning", 22 | "wandb_run_name" : "T5_large_mixreview_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_lr.001_mixreview/last.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_modularsmall_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 2, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 8, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_kilt", 22 | "wandb_run_name" : "T5_large_modularsmall_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/small/modular.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_modularsmall_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/modular.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_kilt", 22 | "wandb_run_name" : "T5_large_modularsmall_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/small/modular.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_full_modularsmall.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_full_modularsmall.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_full_modularsmall.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_full_modularsmall.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_kilt", 22 | "wandb_run_name" : "T5_large_modularsmall_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/small/modular.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_modularsmall_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/modular.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_full_modularsmall.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_modular_small/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_modularsmall_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/last_full_modularsmall.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_ay2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 768, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "T5_large_recadam_AY2_finetuning_baseline", 6 | "dataset" : "AY2", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 4, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_AY2_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_cweb.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 2048, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "CWEB", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 16, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_cweb_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_recadam_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/CWEB/recadam.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_eli5.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 350, 4 | "num_train_epochs" : 6, 5 | "output_dir" : "", 6 | "dataset" : "ELI5", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 12, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_eli5_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_fever.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 5, 5 | "output_dir" : "", 6 | "dataset" : "fever", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_fever_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_hotpotqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 50, 3 | "output_length" : 8, 4 | "num_train_epochs" : 12, 5 | "output_dir" : "", 6 | "dataset" : "HotpotQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_hotpotqa_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_nq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 45, 5 | "output_dir" : "", 6 | "dataset" : "NQ", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_nq_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_tqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 10, 4 | "num_train_epochs" : 50, 5 | "output_dir" : "", 6 | "dataset" : "TriviaQA", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_triviaQA_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_trex.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 35, 3 | "output_length" : 6, 4 | "num_train_epochs" : 9, 5 | "output_dir" : "", 6 | "dataset" : "TREX", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 192, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_trex_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_wned.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 512, 3 | "output_length" : 6, 4 | "num_train_epochs" : 20, 5 | "output_dir" : "", 6 | "dataset" : "WNED", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 64, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_wned_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "T5_large_recadam_AY2_finetuning_baseline/last.ckpt", 27 | "output_log" : "log/kilt/WNED/recadam.csv" 28 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_wow.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 175, 3 | "output_length" : 40, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "", 6 | "dataset" : "WOW", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 24, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_wow_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/kilt/t5_recadam/t5_zsre.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 6, 4 | "num_train_epochs" : 30, 5 | "output_dir" : "", 6 | "dataset" : "zsRE", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 128, 9 | "learning_rate" : 1e-4, 10 | "model" : "google/t5-large-ssm", 11 | "method": "models_baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 1, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_finetuning_full", 22 | "wandb_run_name" : "T5_large_recadam_zsre_finetuning_baseline", 23 | "mode" : "finetune", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/full_recadam.ckpt" 27 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/baseline_last.ckpt", 27 | "output_log" : "log/invariantLAMA/baseline_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/invariantLAMA/initial_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/kadapter_last.ckpt", 27 | "output_log" : "log/invariantLAMA/kadapter_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/lora_last.ckpt", 27 | "output_log" : "log/invariantLAMA/lora_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/mixreview_last.ckpt", 27 | "output_log" : "log/invariantLAMA/mixreview_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/modular_last.ckpt", 27 | "output_log" : "log/invariantLAMA/modular_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/invariantLAMA/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/recadam_last.ckpt", 27 | "output_log" : "log/invariantLAMA/recadam_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy1/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/baseline_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/baseline_small.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy1/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/newLAMA_Easy1/initial_small.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy1/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/kadapter_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/kadapter_small.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy1/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/lora_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/lora_small.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } 31 | -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy1/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/recadam_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/recadam_small.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy2/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/baseline_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/baseline_small.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy2/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/newLAMA_Easy2/initial_small.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy2/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/lora_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/lora_small.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } 31 | -------------------------------------------------------------------------------- /configs/small_setting/evaluation/newLAMA_easy2/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/small/recadam_last.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/recadam_small.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/small_setting/training/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lr.001_baseline", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/small_setting/training/t5_kadapters.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lr.001_adapters", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_adapters", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/small_setting/training/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lr.001_lora", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/small_setting/training/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lr.001_mixreview", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "mixreview", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/small_setting/training/t5_modular_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_modular_small", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_3", 22 | "wandb_run_name" : "T5_large_recentnews(small)_modular_small", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/small_setting/training/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lr.001_recadam_accum5", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 3, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "recadam", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 5, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam_accum5", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "" 27 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_baseline1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/baseline1.ckpt", 27 | "output_log" : "log/invariantLAMA/baseline1.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_baseline2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/baseline2.ckpt", 27 | "output_log" : "log/invariantLAMA/baseline2.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/invariantLAMA/initial_small.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_kadapters1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/kadapter1.ckpt", 27 | "output_log" : "log/invariantLAMA/kadapter1.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_kadapters2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/kadapter2.ckpt", 27 | "output_log" : "log/invariantLAMA/kadapter2.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_lora1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/lora1.ckpt", 27 | "output_log" : "log/invariantLAMA/lora1.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_lora2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/lora2.ckpt", 27 | "output_log" : "log/invariantLAMA/lora2.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_mixreview1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/mixreview1.ckpt", 27 | "output_log" : "log/invariantLAMA/mixreview1.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_mixreview2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/mixreview2.ckpt", 27 | "output_log" : "log/invariantLAMA/mixreview2.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_modular_small1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/modular1.ckpt", 27 | "output_log" : "log/invariantLAMA/modular1.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_modular_small2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/modular2.ckpt", 27 | "output_log" : "log/invariantLAMA/modular2.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_recadam1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/recadam1.ckpt", 27 | "output_log" : "log/invariantLAMA/recadam1.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/invariantLAMA/t5_recadam2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 25, 3 | "output_length" : 4, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "invariantlama", 7 | "dataset_version" : "full", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/recadam2.ckpt", 27 | "output_log" : "log/invariantLAMA/recadam2.csv" 28 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_baseline1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/baseline1.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/baseline1.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_baseline2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/baseline2.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/baseline2.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/newLAMA_Easy1/initial_small.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_kadapters1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/kadapter1.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/kadapter1.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_kadapters2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/kadapter2.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/kadapter2.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_lora1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/lora1.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/lora1.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } 31 | -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_lora2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/lora2.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/lora2.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } 31 | -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_mixreview1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/mixreview1.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/mixreview1.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_mixreview2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/mixreview2.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/mixreview2.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_modular_small1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/modular1.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/modular1.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_modular_small2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/modular2.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/modular2.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_recadam1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/recadam1.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/recadam1.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy1/t5_recadam2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/recadam2.ckpt", 27 | "output_log" : "log/newLAMA_Easy1/recadam2.csv", 28 | "split_num" : 2, 29 | "split" : 1 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_baseline1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/baseline1.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/baseline1.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_baseline2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/baseline2.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/baseline2.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_initial.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_initial", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "", 27 | "output_log" : "log/newLAMA_Easy2/initial_small.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_kadapters1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/kadapter1.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/kadapter1.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_kadapters2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/kadapter2.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/kadapter2.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_lora1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/lora1.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/lora1.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } 31 | -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_lora2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/lora2.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/lora2.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } 31 | -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_mixreview1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/mixreview1.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/mixreview1.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_mixreview2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/mixreview2.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/mixreview2.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_modular_small1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/modular1.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/modular1.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_modular_small2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/modular2.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/modular2.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_recadam1.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/recadam1.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/recadam1.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/evaluation/newLAMA_easy2/t5_recadam2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 150, 3 | "output_length" : 10, 4 | "num_train_epochs" : 1, 5 | "output_dir" : "", 6 | "dataset" : "newlama_easy", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 32, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 1, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "2", 20 | "wandb_log": false, 21 | "wandb_project": "continual_learning_evaluation", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lr.001_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : false, 25 | "check_validation" : true, 26 | "checkpoint_path" : "outputs/split/recadam2.ckpt", 27 | "output_log" : "log/newLAMA_Easy2/recadam2.csv", 28 | "split_num" : 2, 29 | "split" : 2 30 | } -------------------------------------------------------------------------------- /configs/split/training/t5_baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_baseline_split", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_baseline", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 1 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_baseline2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_baseline_split2", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "baseline", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_baseline2", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_baseline_split/last.ckpt", 27 | "split_num" : 2, 28 | "split" : 2 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_kadapter.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_kadapter_split", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-small-ssm", 11 | "method": "kadapter", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_kadapter", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 1 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_kadapter2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_kadapter_split2", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-small-ssm", 11 | "method": "kadapter2", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_kadapter2", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 2 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_lora.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lora_split", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lora", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 1 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_lora2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_lora_split2", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "lora", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_lora2", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 2 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_mixreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_mixreview_split", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "mixreview", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_mixreview", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 1 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_mixreview2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_mixreview_split2", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "mixreview", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_mixreview2", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_mixreview_split/last.ckpt", 27 | "split_num" : 2, 28 | "split" : 2 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_modular.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_modular_split", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_modular", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 1 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_modular2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_modular_split2", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 5, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "modular_small", 12 | "freeze_level": 1, 13 | "gradient_accumulation_steps" : 3, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "0,1,2,3", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_modular2", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 2 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_recadam.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_recadam_split", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 3, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "recadam", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 5, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_recadam", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "", 27 | "split_num" : 2, 28 | "split" : 1 29 | } -------------------------------------------------------------------------------- /configs/split/training/t5_recadam2.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_length" : 350, 3 | "output_length" : 350, 4 | "num_train_epochs" : 8, 5 | "output_dir" : "outputs/T5_large_recentnews(small)_recadam_split2", 6 | "dataset" : "recentnews", 7 | "dataset_version" : "small", 8 | "train_batch_size" : 3, 9 | "learning_rate" : 1e-3, 10 | "model" : "google/t5-large-ssm", 11 | "method": "recadam2", 12 | "freeze_level": 0, 13 | "gradient_accumulation_steps" : 5, 14 | "ngpu" : 4, 15 | "num_workers" : 40, 16 | "resume_from_checkpoint" : null, 17 | "accelerator" : "ddp", 18 | "use_deepspeed" : false, 19 | "CUDA_VISIBLE_DEVICES" : "4,5,6,7", 20 | "wandb_log": true, 21 | "wandb_project": "continual_learning_split", 22 | "wandb_run_name" : "T5_large_recentnews(small)_recadam2", 23 | "mode" : "pretrain", 24 | "use_lr_scheduling" : true, 25 | "check_validation" : false, 26 | "checkpoint_path" : "outputs/T5_large_recentnews(small)_recadam_split/last.ckpt", 27 | "split_num" : 2, 28 | "split" : 2 29 | } -------------------------------------------------------------------------------- /download_ckl_data.py: -------------------------------------------------------------------------------- 1 | from google.cloud import storage 2 | import os 3 | 4 | def download_public_file(bucket_name, directory_name): 5 | """Downloads a public blob from the bucket.""" 6 | # bucket_name = "your-bucket-name" 7 | # source_blob_name = "storage-object-name" 8 | # destination_file_name = "local/path/to/file" 9 | 10 | storage_client = storage.Client.create_anonymous_client() 11 | 12 | bucket = storage_client.bucket(bucket_name) 13 | blobs = bucket.list_blobs(prefix=directory_name) 14 | #blobs = bucket.list_blobs() 15 | for blob in blobs: 16 | blob = bucket.blob(blob.name) 17 | blob.download_to_filename(blob.name) 18 | 19 | print( 20 | "Downloaded public blob {} from bucket {} to {}.".format( 21 | blob.name, bucket.name, blob.name 22 | ) 23 | ) 24 | 25 | os.makedirs('ckl_data', exist_ok=True) 26 | download_public_file('continual_learning', 'ckl_data/') -------------------------------------------------------------------------------- /download_model_checkpoints.py: -------------------------------------------------------------------------------- 1 | from google.cloud import storage 2 | import os 3 | 4 | def download_public_file(bucket_name, directory_name): 5 | """Downloads a public blob from the bucket.""" 6 | # bucket_name = "your-bucket-name" 7 | # source_blob_name = "storage-object-name" 8 | # destination_file_name = "local/path/to/file" 9 | 10 | storage_client = storage.Client.create_anonymous_client() 11 | 12 | bucket = storage_client.bucket(bucket_name) 13 | blobs = bucket.list_blobs(prefix=directory_name) 14 | #blobs = bucket.list_blobs() 15 | for blob in blobs: 16 | blob = bucket.blob(blob.name) 17 | blob.download_to_filename(blob.name) 18 | 19 | print( 20 | "Downloaded public blob {} from bucket {} to {}.".format( 21 | blob.name, bucket.name, blob.name 22 | ) 23 | ) 24 | 25 | os.makedirs('outputs/full', exist_ok=True) 26 | download_public_file('continual_learning', 'outputs/') -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from models.GPT2_Model import GPT2 as GPT2_Model 2 | from models.T5_Model import T5 as T5_Model 3 | 4 | def load_model(type: str): 5 | if type=='T5': 6 | return T5_Model 7 | elif type=='GPT2': 8 | return GPT2_Model 9 | else: 10 | raise Exception('Select the correct model type. Currently supporting only T5 and GPT2.') -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.12.3 2 | pytorch-lightning==1.3.8 3 | pandas 4 | numpy 5 | nlp==0.4.0 6 | sentencepiece==0.1.94 7 | wandb 8 | nltk 9 | google-cloud-storage 10 | deepspeed 11 | boto3 12 | datasets 13 | rouge 14 | --------------------------------------------------------------------------------