├── tests ├── __init__.py ├── test_utils │ └── __init__.py ├── test_method │ └── __init__.py ├── test_mixins │ └── __init__.py ├── test_modelpool │ └── __init__.py ├── test_models │ └── __init__.py ├── import_profile.py └── README.md ├── docs ├── config ├── .gitignore ├── algorithms │ ├── model_stitching.md │ ├── specification_ensemble.md │ ├── layer_recombination.md │ ├── images │ │ ├── ewemoe.png │ │ ├── wemoe.png │ │ ├── bitdelta.png │ │ ├── ewemoe_1.png │ │ ├── ewemoe_2.png │ │ ├── pwe_moe.png │ │ ├── sigmoid.png │ │ ├── adamerging.png │ │ ├── iso_merging.png │ │ ├── solar10.7B.png │ │ ├── ties_merging.jpg │ │ ├── Task Arithmetic.png │ │ ├── smile_upscaling.png │ │ ├── sparse_upcycling.png │ │ ├── wemoe_lr_tuning.png │ │ ├── max-model_predictor.png │ │ ├── wemoe_loss_landscape.png │ │ ├── concrete_subspace_learning.png │ │ ├── fedmr_model_recombination.jpg │ │ ├── regmean_vs_regmean_plusplus.png │ │ ├── adamerging_layerwise_coefficients.png │ │ ├── concrete_adamerging_vs_adamerging.png │ │ ├── ties_merging_hyperparameter_tuning.png │ │ └── adamerging_model_merging_coefficients.png │ ├── pruning │ │ └── images │ │ │ └── llama_2_4_semistructued_first_layer.png │ └── slerp.md ├── api │ ├── fusion_bench.utils │ │ ├── misc.md │ │ ├── profiling.md │ │ ├── caching.md │ │ ├── filesystem.md │ │ ├── modelscope.md │ │ ├── logging.md │ │ ├── package_management.md │ │ ├── torch.md │ │ └── data.md │ ├── fusion_bench.optim.md │ ├── fusion_bench.method │ │ └── ensemble.md │ └── fusion_bench.program.md ├── css │ └── material_extra.css ├── images │ ├── llm.png │ ├── model_mixing.png │ ├── model_ensemble.png │ ├── model_merging.png │ ├── fusion_bench_flow.png │ ├── model_upscaling.png │ ├── learning_paradiagm.png │ ├── multi-task_core_steps.png │ ├── multi-task_model_fusion.png │ ├── accelerate model training.png │ └── framework_of_model_fusion.png ├── cli │ └── images │ │ ├── vscode_debug.png │ │ ├── pycharm_debug_1.png │ │ ├── pycharm_debug_2.png │ │ ├── pycharm_debug_3.png │ │ ├── tab_completion.png │ │ └── fusion_bench_webui.png ├── modelpool │ ├── clip-vit-cos.png │ └── images │ │ ├── convnext_block.png │ │ ├── clip_eight_corruption.png │ │ ├── NYUv2-0000003446-63769b25.jpg │ │ ├── clip-vit-base-patch16_full&lora&l-lora.png │ │ └── clip-vit-base-patch16_full&lora&l-lora_average.png ├── readinglist │ └── images │ │ ├── watt.png │ │ ├── fusellm.png │ │ ├── lorahub.png │ │ ├── pwe_moe.png │ │ ├── forkmerge.png │ │ ├── fs-merge.png │ │ ├── fusechat.png │ │ ├── lora_lego.png │ │ ├── pituning.png │ │ ├── adapter_soup.png │ │ ├── twin_merging.png │ │ ├── depth_upscaling.png │ │ ├── scaling_smart.png │ │ ├── smile_upscaling.png │ │ ├── Chronopoulou2023.png │ │ ├── enneng2024survey.png │ │ ├── sparse-modelsoups.png │ │ ├── sparse_upcycling.png │ │ ├── branch_and_merging.png │ │ └── branch_and_merging_alg.png ├── guides │ └── fusion_bench │ │ └── mixins │ │ └── lightning_fabric.md ├── taskpool │ ├── dummy.md │ ├── LlamaTestGenerationTaskPool.md │ ├── gpt2_classification.md │ └── flan-t5_generation.md └── javascripts │ └── mathjax.js ├── .vscode ├── .gitignore └── init.sh ├── fusion_bench_config ├── config ├── .gitignore ├── model │ ├── clip-vit │ │ ├── clip-vit-base-patch16.yaml │ │ ├── clip-vit-base-patch16_dtd.yaml │ │ ├── clip-vit-base-patch32.yaml │ │ ├── clip-vit-base-patch32_dtd.yaml │ │ ├── clip-vit-large-patch14.yaml │ │ ├── clip-vit-large-patch14_dtd.yaml │ │ ├── clip-vit-base-patch16_gtsrb.yaml │ │ ├── clip-vit-base-patch16_mnist.yaml │ │ ├── clip-vit-base-patch16_pcam.yaml │ │ ├── clip-vit-base-patch16_stl10.yaml │ │ ├── clip-vit-base-patch16_svhn.yaml │ │ ├── clip-vit-base-patch32_gtsrb.yaml │ │ ├── clip-vit-base-patch32_mnist.yaml │ │ ├── clip-vit-base-patch32_pcam.yaml │ │ ├── clip-vit-base-patch32_stl10.yaml │ │ ├── clip-vit-base-patch32_svhn.yaml │ │ ├── clip-vit-large-patch14_pcam.yaml │ │ ├── clip-vit-large-patch14_svhn.yaml │ │ ├── clip-vit-base-patch16_cifar10.yaml │ │ ├── clip-vit-base-patch16_eurosat.yaml │ │ ├── clip-vit-base-patch16_fer2013.yaml │ │ ├── clip-vit-base-patch16_food101.yaml │ │ ├── clip-vit-base-patch16_kmnist.yaml │ │ ├── clip-vit-base-patch16_sun397.yaml │ │ ├── clip-vit-base-patch32_cifar10.yaml │ │ ├── clip-vit-base-patch32_eurosat.yaml │ │ ├── clip-vit-base-patch32_fer2013.yaml │ │ ├── clip-vit-base-patch32_food101.yaml │ │ ├── clip-vit-base-patch32_kmnist.yaml │ │ ├── clip-vit-base-patch32_sun397.yaml │ │ ├── clip-vit-large-patch14_gtsrb.yaml │ │ ├── clip-vit-large-patch14_kmnist.yaml │ │ ├── clip-vit-large-patch14_mnist.yaml │ │ ├── clip-vit-large-patch14_stl10.yaml │ │ ├── clip-vit-large-patch14_sun397.yaml │ │ ├── clip-vit-base-patch16_cifar100.yaml │ │ ├── clip-vit-base-patch16_resisc45.yaml │ │ ├── clip-vit-base-patch32_cifar100.yaml │ │ ├── clip-vit-base-patch32_resisc45.yaml │ │ ├── clip-vit-large-patch14_cifar10.yaml │ │ ├── clip-vit-large-patch14_cifar100.yaml │ │ ├── clip-vit-large-patch14_eurosat.yaml │ │ ├── clip-vit-large-patch14_fer2013.yaml │ │ ├── clip-vit-large-patch14_food101.yaml │ │ ├── clip-vit-large-patch14_resisc45.yaml │ │ ├── clip-vit-base-patch16_fashion_mnist.yaml │ │ ├── clip-vit-base-patch16_rendered-sst2.yaml │ │ ├── clip-vit-base-patch16_stanford-cars.yaml │ │ ├── clip-vit-base-patch32_fashion_mnist.yaml │ │ ├── clip-vit-base-patch32_rendered-sst2.yaml │ │ ├── clip-vit-base-patch32_stanford-cars.yaml │ │ ├── clip-vit-large-patch14_fashion_mnist.yaml │ │ ├── clip-vit-large-patch14_rendered-sst2.yaml │ │ ├── clip-vit-large-patch14_stanford-cars.yaml │ │ ├── clip-vit-base-patch16_emnist_letters.yaml │ │ ├── clip-vit-base-patch16_oxford-iiit-pet.yaml │ │ ├── clip-vit-base-patch32_emnist_letters.yaml │ │ ├── clip-vit-base-patch32_oxford-iiit-pet.yaml │ │ ├── clip-vit-large-patch14_emnist_letters.yaml │ │ ├── clip-vit-base-patch16_oxford_flowers102.yaml │ │ ├── clip-vit-base-patch32_oxford_flowers102.yaml │ │ ├── clip-vit-large-patch14_oxford-iiit-pet.yaml │ │ ├── clip-vit-large-patch14_oxford_flowers102.yaml │ │ ├── clip-vit-base-patch16_eight_tasks.yaml │ │ ├── clip-vit-large-patch14_eight_tasks.yaml │ │ ├── clip-vit-base-patch32_eight_tasks.yaml │ │ └── download_TALL20_models.sh │ └── flan-t5 │ │ ├── flan-t5-base.yaml │ │ ├── flan-t5-large.yaml │ │ ├── flan-t5-base_glue-qqp.yaml │ │ ├── flan-t5-base_glue-rte.yaml │ │ ├── flan-t5-base_glue-cola.yaml │ │ ├── flan-t5-base_glue-mnli.yaml │ │ ├── flan-t5-base_glue-mrpc.yaml │ │ ├── flan-t5-base_glue-qnli.yaml │ │ ├── flan-t5-base_glue-sst2.yaml │ │ ├── flan-t5-base_glue-stsb.yaml │ │ ├── flan-t5-base_glue-cola_lora-16.yaml │ │ ├── flan-t5-base_glue-mnli_lora-16.yaml │ │ ├── flan-t5-base_glue-mrpc_lora-16.yaml │ │ ├── flan-t5-base_glue-qnli_lora-16.yaml │ │ ├── flan-t5-base_glue-qqp_lora-16.yaml │ │ ├── flan-t5-base_glue-rte_lora-16.yaml │ │ ├── flan-t5-base_glue-sst2_lora-16.yaml │ │ ├── flan-t5-base_glue-stsb_lora-16.yaml │ │ ├── flan-t5-large_glue-qqp_lora-16.yaml │ │ ├── flan-t5-large_glue-rte_lora-16.yaml │ │ ├── flan-t5-large_glue-cola_lora-16.yaml │ │ ├── flan-t5-large_glue-mnli_lora-16.yaml │ │ ├── flan-t5-large_glue-mrpc_lora-16.yaml │ │ ├── flan-t5-large_glue-qnli_lora-16.yaml │ │ ├── flan-t5-large_glue-sst2_lora-16.yaml │ │ └── flan-t5-large_glue-stsb_lora-16.yaml ├── taskpool │ ├── dummy.yaml │ ├── CLIPVisionModelTaskPool │ │ ├── clip-vit-single-task_dtd.yaml │ │ ├── clip-vit-single-task_gtsrb.yaml │ │ ├── clip-vit-single-task_mnist.yaml │ │ ├── clip-vit-single-task_pcam.yaml │ │ ├── clip-vit-single-task_stl10.yaml │ │ ├── clip-vit-single-task_svhn.yaml │ │ ├── clip-vit-single-task_cifar10.yaml │ │ ├── clip-vit-single-task_eurosat.yaml │ │ ├── clip-vit-single-task_fer2013.yaml │ │ ├── clip-vit-single-task_food101.yaml │ │ ├── clip-vit-single-task_kmnist.yaml │ │ ├── clip-vit-single-task_sun397.yaml │ │ ├── clip-vit-single-task_cifar100.yaml │ │ ├── clip-vit-single-task_resisc45.yaml │ │ ├── clip-vit-single-task_emnist_letters.yaml │ │ ├── clip-vit-single-task_fashion_mnist.yaml │ │ ├── clip-vit-single-task_oxford-iiit-pet.yaml │ │ ├── clip-vit-single-task_rendered-sst2.yaml │ │ ├── clip-vit-single-task_stanford-cars.yaml │ │ ├── clip-vit-single-task_oxford_flowers102.yaml │ │ ├── clip-vit-single-task_oxford_flowers102_val.yaml │ │ ├── clip-vit-classification_TA8.yaml │ │ ├── clip-vit-classification_TA8_val.yaml │ │ ├── clip-vit-classification_TA8_with_control_task.yaml │ │ └── clip-vit-classification_TA8_L14.yaml │ ├── nyuv2_taskpool.yaml │ ├── LMEvalHarnessTaskPool │ │ └── lm_eval.yaml │ └── reward_model_evaluation.yaml ├── dataset │ ├── summarization │ │ ├── xsum.yaml │ │ ├── test │ │ │ └── xsum.yaml │ │ ├── train │ │ │ └── xsum.yaml │ │ └── val │ │ │ └── xsum.yaml │ ├── image_classification │ │ ├── test │ │ │ ├── mnist.yaml │ │ │ ├── dtd.yaml │ │ │ ├── fer2013.yaml │ │ │ ├── gtsrb.yaml │ │ │ ├── kmnist.yaml │ │ │ ├── stl10.yaml │ │ │ ├── sun397.yaml │ │ │ ├── cifar10.yaml │ │ │ ├── eurosat.yaml │ │ │ ├── food101.yaml │ │ │ ├── pcam.yaml │ │ │ ├── cifar100.yaml │ │ │ ├── resisc45.yaml │ │ │ ├── cub-200-2011.yaml │ │ │ ├── emnist_mnist.yaml │ │ │ ├── rendered-sst2.yaml │ │ │ ├── emnist_letters.yaml │ │ │ ├── oxford-iiit-pet.yaml │ │ │ ├── stanford-cars.yaml │ │ │ ├── svhn.yaml │ │ │ ├── tiny-imagenet.yaml │ │ │ ├── fashion_mnist.yaml │ │ │ ├── mango-leaf-disease.yaml │ │ │ ├── oxford_flowers102.yaml │ │ │ └── the_eight_tasks.yaml │ │ ├── train │ │ │ ├── mnist.yaml │ │ │ ├── dtd.yaml │ │ │ ├── fer2013.yaml │ │ │ ├── gtsrb.yaml │ │ │ ├── stl10.yaml │ │ │ ├── cifar10.yaml │ │ │ ├── eurosat.yaml │ │ │ ├── food101.yaml │ │ │ ├── kmnist.yaml │ │ │ ├── pcam.yaml │ │ │ ├── sun397.yaml │ │ │ ├── cifar100.yaml │ │ │ ├── resisc45.yaml │ │ │ ├── emnist_mnist.yaml │ │ │ ├── cub-200-2011.yaml │ │ │ ├── oxford-iiit-pet.yaml │ │ │ ├── rendered-sst2.yaml │ │ │ ├── stanford-cars.yaml │ │ │ ├── svhn.yaml │ │ │ ├── tiny-imagenet.yaml │ │ │ ├── emnist_letters.yaml │ │ │ ├── fashion_mnist.yaml │ │ │ ├── mango-leaf-disease.yaml │ │ │ ├── oxford_flowers102.yaml │ │ │ └── the_eight_tasks.yaml │ │ ├── val │ │ │ ├── the_eight_tasks.yaml │ │ │ ├── dtd.yaml │ │ │ ├── mnist.yaml │ │ │ ├── gtsrb.yaml │ │ │ ├── sun397.yaml │ │ │ ├── eurosat.yaml │ │ │ ├── resisc45.yaml │ │ │ ├── stanford-cars.yaml │ │ │ └── svhn.yaml │ │ └── README.md │ ├── text_generation │ │ ├── test │ │ │ ├── gsm8k.yaml │ │ │ ├── gsm-hard.yaml │ │ │ └── gsm8k_question_label.yaml │ │ └── train │ │ │ ├── gsm8k.yaml │ │ │ ├── CodeAlpaca-20k.yaml │ │ │ └── gsm8k_question_label.yaml │ ├── llm_sft │ │ ├── ultrachat_200k.yaml │ │ └── alpaca_cleaned.yaml │ └── question_answering │ │ ├── train │ │ ├── MetaMathQA.yaml │ │ └── search_qa.yaml │ │ ├── search_qa.yaml │ │ ├── test │ │ └── search_qa.yaml │ │ └── val │ │ └── search_qa.yaml ├── fabric │ ├── loggers │ │ ├── mlflow_logger.yaml │ │ ├── wandb_logger.yaml │ │ ├── swandb_logger.yaml │ │ ├── csv_logger.yaml │ │ └── tensorboard_logger.yaml │ └── strategy │ │ ├── llama_fsdp.yaml │ │ ├── llama_peft_fsdp.yaml │ │ └── deepspeed.yaml ├── method │ ├── doge_ta │ │ └── doge_ta.yaml │ ├── isotropic_merging │ │ ├── iso_c.yaml │ │ └── iso_cts.yaml │ ├── dare │ │ ├── simple_average.yaml │ │ ├── task_arithmetic.yaml │ │ └── ties_merging.yaml │ ├── tall_mask │ │ └── task_arithmetic.yaml │ ├── pruning │ │ ├── magnitude_diff_pruning.yaml │ │ └── llama_random_pruning.yaml │ ├── analysis │ │ ├── task_vector_violin_plot.yaml │ │ └── task_vector_cos_similarity.yaml │ ├── ada_svd │ │ └── clip_vision.yaml │ ├── classification │ │ └── image_classification_finetune_test.yaml │ ├── trust_region │ │ └── clip_task_arithmetic.yaml │ ├── expert_sparsity │ │ └── README.md │ ├── task_singular_vector │ │ └── TaskSingularVectorMerging.yaml │ ├── fw_merging │ │ ├── fw_hard.yaml │ │ └── fw_soft.yaml │ ├── wudi │ │ └── wudi.yaml │ ├── regmean │ │ └── regmean.yaml │ ├── mixtral_moe_merging.yaml │ ├── linear │ │ ├── task_arithmetic_for_causallm.yaml │ │ └── weighted_average.yaml │ ├── dummy.yaml │ ├── bitdelta │ │ └── bitdelta.yaml │ ├── smile_upscaling │ │ └── singular_projection_merging.yaml │ ├── ensemble │ │ └── max_model_predictor.yaml │ ├── moe_pruner │ │ └── moe_pruner.yaml │ └── slerp │ │ └── slerp_lm.yaml ├── _get_started │ ├── greeting_program.yaml │ └── llm_slerp.yaml ├── modelpool │ ├── CausalLMPool │ │ ├── mistral-7b.yaml │ │ ├── vicuna-7b-v1.5.yaml │ │ ├── Qwen2.5-7B-math_and_coder.yaml │ │ ├── qwen2_math_1.5B_and_R1.yaml │ │ ├── Qwen2.5-1.5B_math_and_code.yaml │ │ ├── Qwen2.5-1.5B_three_models.yaml │ │ ├── llama-7b_3-models_v1.yaml │ │ ├── mixtral_moe_merging.yaml │ │ ├── simle_mixtral_exp_v4.yaml │ │ └── mergebench │ │ │ ├── gemma-2-2b.yaml │ │ │ ├── gemma-2-9b.yaml │ │ │ ├── Llama-3.1-8B.yaml │ │ │ ├── Llama-3.2-3B.yaml │ │ │ ├── gemma-2-2b-it.yaml │ │ │ └── gemma-2-9b-it.yaml │ ├── CLIPVisionModelPool │ │ ├── clip-vit-base-patch32_individual.yaml │ │ ├── clip-vit-base-patch32_mtl.yaml │ │ ├── _template.yaml │ │ ├── clip-vit-base-patch16_TA8_model_only.yaml │ │ ├── clip-vit-base-patch32_single_finetuned.yaml │ │ ├── clip-vit-base-patch32_svhn_and_mnist.yaml │ │ ├── clip-vit-base-patch32_single_task_projection.yaml │ │ ├── clip-vit-base-patch16_TA8.yaml │ │ ├── clip-vit-base-patch16_TALL14_model_only.yaml │ │ ├── clip-vit-large-patch14_TALL14_model_only.yaml │ │ ├── clip-vit-base-patch32_two_tasks_control_task.yaml │ │ └── clip-vit-large-patch14_individual.yaml │ ├── Seq2SeqLMPool │ │ ├── flan-t5-base_individual.yaml │ │ ├── _template.yaml │ │ └── flan-t5-base_glue.yaml │ ├── OpenCLIPVisionModelPool │ │ └── ViT-B-32_individual.yaml │ ├── ConvNextForImageClassification │ │ └── convnext-base-224.yaml │ ├── automodelpool.yaml │ ├── Dinov2ForImageClassification │ │ └── dinov2-base-imagenet1k-1-layer.yaml │ ├── smile_mistral_exp_v1.yaml │ ├── smile_mistral_exp_v3.yaml │ ├── smile_mistral_exp_v2.yaml │ └── ResNetForImageClassification │ │ └── transformers │ │ ├── resnet18_dtd.yaml │ │ ├── resnet50_dtd.yaml │ │ ├── resnet152_dtd.yaml │ │ ├── resnet18_pcam.yaml │ │ ├── resnet18_svhn.yaml │ │ ├── resnet50_pcam.yaml │ │ ├── resnet50_svhn.yaml │ │ ├── resnet152_gtsrb.yaml │ │ ├── resnet152_mnist.yaml │ │ ├── resnet152_pcam.yaml │ │ ├── resnet152_stl10.yaml │ │ ├── resnet152_svhn.yaml │ │ ├── resnet18_gtsrb.yaml │ │ ├── resnet18_mnist.yaml │ │ ├── resnet18_stl10.yaml │ │ ├── resnet50_gtsrb.yaml │ │ ├── resnet50_mnist.yaml │ │ ├── resnet50_stl10.yaml │ │ ├── resnet152_kmnist.yaml │ │ ├── resnet152_sun397.yaml │ │ ├── resnet18_kmnist.yaml │ │ ├── resnet18_sun397.yaml │ │ ├── resnet50_kmnist.yaml │ │ ├── resnet50_sun397.yaml │ │ ├── resnet152_cifar10.yaml │ │ ├── resnet152_eurosat.yaml │ │ ├── resnet152_fer2013.yaml │ │ ├── resnet152_food101.yaml │ │ ├── resnet18_cifar10.yaml │ │ ├── resnet18_cifar100.yaml │ │ ├── resnet18_eurosat.yaml │ │ ├── resnet18_fer2013.yaml │ │ ├── resnet18_food101.yaml │ │ ├── resnet18_resisc45.yaml │ │ ├── resnet50_cifar10.yaml │ │ ├── resnet50_cifar100.yaml │ │ ├── resnet50_eurosat.yaml │ │ ├── resnet50_fer2013.yaml │ │ ├── resnet50_food101.yaml │ │ ├── resnet50_resisc45.yaml │ │ ├── resnet152_cifar100.yaml │ │ ├── resnet152_resisc45.yaml │ │ ├── resnet18_fashion_mnist.yaml │ │ ├── resnet18_rendered-sst2.yaml │ │ ├── resnet18_stanford-cars.yaml │ │ ├── resnet50_fashion_mnist.yaml │ │ ├── resnet50_rendered-sst2.yaml │ │ ├── resnet50_stanford-cars.yaml │ │ ├── resnet152_fashion_mnist.yaml │ │ ├── resnet152_rendered-sst2.yaml │ │ ├── resnet152_stanford-cars.yaml │ │ ├── resnet18_emnist_letters.yaml │ │ ├── resnet50_emnist_letters.yaml │ │ ├── resnet152_emnist_letters.yaml │ │ ├── resnet152_oxford-iiit-pet.yaml │ │ ├── resnet18_oxford-iiit-pet.yaml │ │ ├── resnet50_oxford-iiit-pet.yaml │ │ ├── resnet152_oxford_flowers102.yaml │ │ ├── resnet18_oxford_flowers102.yaml │ │ └── resnet50_oxford_flowers102.yaml ├── nyuv2_config.yaml ├── llama_full_finetune.yaml ├── hydra │ └── default.yaml └── clip-vit-base-patch32_robustness_corrupted.yaml ├── fusion_bench ├── compat │ └── __init__.py ├── metrics │ ├── __init__.py │ ├── continual_learning │ │ └── __init__.py │ ├── model_kinship │ │ └── __init__.py │ └── text_to_image_generation │ │ └── __init__.py ├── scripts │ ├── __init__.py │ └── clip │ │ └── __init__.py ├── method │ ├── knots │ │ └── __init__.py │ ├── bitdelta │ │ ├── bitdelta_utils │ │ │ └── __init__.py │ │ └── __init__.py │ ├── dop │ │ └── __init__.py │ ├── model_stock │ │ └── __init__.py │ ├── wudi │ │ └── __init__.py │ ├── pruning │ │ ├── sparsegpt_utils │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── wanda_utils │ │ │ └── __init__.py │ ├── moe_pruner │ │ ├── utils │ │ │ └── __init__.py │ │ ├── hooks │ │ │ └── __init__.py │ │ └── __init__.py │ ├── doge_ta │ │ └── __init__.py │ ├── tall_mask │ │ └── __init__.py │ ├── slerp │ │ └── __init__.py │ ├── ties_merging │ │ └── __init__.py │ ├── ada_svd │ │ └── __init__.py │ ├── dawe │ │ └── __init__.py │ ├── pwe_moe │ │ ├── phn │ │ │ └── __init__.py │ │ └── __init__.py │ ├── surgery │ │ └── __init__.py │ ├── fw_merging │ │ └── __init__.py │ ├── trust_region │ │ └── __init__.py │ ├── task_arithmetic │ │ └── __init__.py │ ├── sparselo │ │ └── __init__.py │ ├── analysis │ │ └── __init__.py │ ├── rankone_moe │ │ └── __init__.py │ ├── weighted_average │ │ └── __init__.py │ ├── sparse_we_moe │ │ └── __init__.py │ ├── we_moe │ │ ├── __init__.py │ │ └── utils.py │ ├── depth_upscaling │ │ └── __init__.py │ ├── regmean_plusplus │ │ └── __init__.py │ ├── dare │ │ └── __init__.py │ ├── lm_finetune │ │ ├── __init__.py │ │ └── causal_lm_pretrain.py │ ├── regmean │ │ └── __init__.py │ ├── smile_upscaling │ │ └── __init__.py │ ├── gossip │ │ └── __init__.py │ ├── opcm │ │ └── __init__.py │ ├── mixture_of_experts │ │ └── __init__.py │ ├── fisher_merging │ │ └── __init__.py │ ├── expert_sparsity │ │ └── __init__.py │ ├── task_singular_vector │ │ ├── __init__.py │ │ ├── utils │ │ │ └── __init__.py │ │ └── TSVC.py │ └── linear │ │ └── __init__.py ├── mixins │ ├── optim │ │ └── __init__.py │ └── openclip_classification.py ├── models │ ├── nyuv2 │ │ └── __init__.py │ ├── linearized │ │ └── __init__.py │ ├── smile_moe │ │ └── __init__.py │ ├── wrappers │ │ └── __init__.py │ ├── expert_sparsity │ │ └── __init__.py │ ├── llama │ │ └── model_utils │ │ │ └── __init__.py │ ├── surgery │ │ └── __init__.py │ ├── masks │ │ └── __init__.py │ ├── chat_templates │ │ └── __init__.py │ ├── modeling_losparse_llama │ │ ├── __init__.py │ │ └── register.py │ ├── open_clip │ │ └── __init__.py │ ├── modeling_smile_llama │ │ ├── __init__.py │ │ └── register.py │ ├── modeling_smile_mistral │ │ ├── __init__.py │ │ └── register.py │ ├── modeling_smile_qwen2 │ │ ├── __init__.py │ │ └── register.py │ └── modeling_smile_gemma2 │ │ └── __init__.py ├── utils │ ├── plot │ │ └── __init__.py │ ├── strenum │ │ └── README.md │ └── set.py ├── dataset │ ├── llama │ │ ├── utils │ │ │ └── __init__.py │ │ └── __init__.py │ ├── image_corruption │ │ └── __init__.py │ ├── arc_agi │ │ └── __init__.py │ ├── imdb.py │ └── fer2013.py ├── taskpool │ ├── clip_vision │ │ ├── utils │ │ │ └── __init__.py │ │ └── __init__.py │ ├── llama │ │ └── __init__.py │ ├── openclip_vision │ │ └── __init__.py │ └── lm_eval_harness │ │ └── __init__.py ├── tasks │ ├── flan_t5_text_generation │ │ └── __init__.py │ ├── __init__.py │ ├── clip_classification │ │ ├── clip_dataset.py │ │ ├── rendered_sst2.py │ │ ├── mnist.py │ │ ├── svhn.py │ │ ├── pcam.py │ │ ├── emnist_mnist.py │ │ ├── stl10.py │ │ ├── kmnist.py │ │ ├── fashion_mnist.py │ │ └── fer2013.py │ └── base_task.py ├── optim │ └── lr_scheduler │ │ └── utils │ │ └── __init__.py ├── _get_started │ └── __init__.py ├── modelpool │ ├── clip_vision │ │ └── __init__.py │ ├── openclip_vision │ │ └── __init__.py │ ├── seq2seq_lm │ │ └── __init__.py │ ├── causal_lm │ │ └── __init__.py │ └── seq_classification_lm │ │ └── __init__.py ├── __main__.py └── constants │ └── __init__.py ├── examples ├── opcm │ └── .gitignore ├── open_clip │ ├── src │ │ └── __init__.py │ ├── requirements.txt │ ├── .gitignore │ └── evaluate_single_model.sh ├── clip_finetune │ ├── .gitignore │ └── config │ │ ├── .gitignore │ │ └── modelpool │ │ ├── clip-finetune_TALL14.yaml │ │ └── clip-finetune_TALL20.yaml ├── hyperparam_search │ └── .gitignore ├── mergebench │ ├── .gitignore │ ├── requirements.txt │ ├── evaluate_all.sh │ ├── evaluate_gemma-2-2b.sh │ ├── evaluate_gemma-2-9b.sh │ ├── evaluate_Llama-3.1-8B.sh │ ├── evaluate_Llama-3.2-3B.sh │ ├── evaluate_gemma-2-2b-it.sh │ ├── evaluate_gemma-2-9b-it.sh │ └── evaluate_Llama-3.2-3B-Instruct.sh ├── iterative_sparselo_pruning │ └── .gitignore ├── smile_upscaling │ ├── .gitignore │ └── SMILE.png ├── README.md ├── ada_svd │ └── clip_vision.sh ├── adamerging │ ├── gpt_2.sh │ └── flan_t5_base.sh ├── gossip │ ├── flan_t5.sh │ └── clip.sh ├── trust_region │ └── READMD.md └── randes │ └── clip-vit-base-patch32.sh ├── .flake8 ├── requirements.txt ├── set_cache_dir.sh └── .github └── ISSUE_TEMPLATE └── config.yml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/config: -------------------------------------------------------------------------------- 1 | ../config/ -------------------------------------------------------------------------------- /.vscode/.gitignore: -------------------------------------------------------------------------------- 1 | *.json -------------------------------------------------------------------------------- /fusion_bench_config: -------------------------------------------------------------------------------- 1 | config -------------------------------------------------------------------------------- /tests/test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/.gitignore: -------------------------------------------------------------------------------- 1 | *.local.yaml -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /_figure_sources/ -------------------------------------------------------------------------------- /fusion_bench/compat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_method/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_mixins/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_modelpool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/algorithms/model_stitching.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/misc.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/opcm/.gitignore: -------------------------------------------------------------------------------- 1 | images/ 2 | -------------------------------------------------------------------------------- /examples/open_clip/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/method/knots/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/mixins/optim/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/models/nyuv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/scripts/clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/utils/plot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/algorithms/specification_ensemble.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/clip_finetune/.gitignore: -------------------------------------------------------------------------------- 1 | /tanganke/ -------------------------------------------------------------------------------- /fusion_bench/models/linearized/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/models/smile_moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/models/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/hyperparam_search/.gitignore: -------------------------------------------------------------------------------- 1 | *.db 2 | -------------------------------------------------------------------------------- /examples/mergebench/.gitignore: -------------------------------------------------------------------------------- 1 | /results/ 2 | -------------------------------------------------------------------------------- /fusion_bench/dataset/llama/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/models/expert_sparsity/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501, W503, E203 3 | -------------------------------------------------------------------------------- /examples/clip_finetune/config/.gitignore: -------------------------------------------------------------------------------- 1 | *.local.yaml -------------------------------------------------------------------------------- /examples/iterative_sparselo_pruning/.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf -------------------------------------------------------------------------------- /fusion_bench/dataset/image_corruption/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/models/llama/model_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/taskpool/clip_vision/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/method/bitdelta/bitdelta_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fusion_bench/tasks/flan_t5_text_generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/open_clip/requirements.txt: -------------------------------------------------------------------------------- 1 | open-clip-torch==2.0.2 2 | -------------------------------------------------------------------------------- /docs/css/material_extra.css: -------------------------------------------------------------------------------- 1 | .md-grid { 2 | max-width: 100%; 3 | } -------------------------------------------------------------------------------- /fusion_bench/dataset/llama/__init__.py: -------------------------------------------------------------------------------- 1 | from . import collate 2 | -------------------------------------------------------------------------------- /docs/algorithms/layer_recombination.md: -------------------------------------------------------------------------------- 1 | # Layer Recombination 2 | 3 | -------------------------------------------------------------------------------- /examples/mergebench/requirements.txt: -------------------------------------------------------------------------------- 1 | immutabledict 2 | langdetect 3 | -------------------------------------------------------------------------------- /fusion_bench/method/dop/__init__.py: -------------------------------------------------------------------------------- 1 | from .dop import ContinualDOPForCLIP 2 | -------------------------------------------------------------------------------- /fusion_bench/method/model_stock/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_stock import ModelStock 2 | -------------------------------------------------------------------------------- /fusion_bench/optim/lr_scheduler/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .visualization import * 2 | -------------------------------------------------------------------------------- /fusion_bench/method/wudi/__init__.py: -------------------------------------------------------------------------------- 1 | from .wudi import WUDIMerging, wudi_merging 2 | -------------------------------------------------------------------------------- /fusion_bench/_get_started/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tutorial module for FusionBench 3 | """ 4 | -------------------------------------------------------------------------------- /fusion_bench/method/pruning/sparsegpt_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .sparsegpt import SparseGPT 2 | -------------------------------------------------------------------------------- /fusion_bench/modelpool/clip_vision/__init__.py: -------------------------------------------------------------------------------- 1 | from .modelpool import CLIPVisionModelPool 2 | -------------------------------------------------------------------------------- /fusion_bench/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .base_task import BaseTask 3 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16.yaml: -------------------------------------------------------------------------------- 1 | _pretrained_: openai/clip-vit-base-patch16 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_dtd.yaml: -------------------------------------------------------------------------------- 1 | dtd: tanganke/clip-vit-base-patch16_dtd 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32.yaml: -------------------------------------------------------------------------------- 1 | _pretrained_: openai/clip-vit-base-patch32 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_dtd.yaml: -------------------------------------------------------------------------------- 1 | dtd: tanganke/clip-vit-base-patch32_dtd 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14.yaml: -------------------------------------------------------------------------------- 1 | _pretrained_: openai/clip-vit-large-patch14 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_dtd.yaml: -------------------------------------------------------------------------------- 1 | dtd: tanganke/clip-vit-large-patch14_dtd 2 | -------------------------------------------------------------------------------- /docs/images/llm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/llm.png -------------------------------------------------------------------------------- /examples/open_clip/.gitignore: -------------------------------------------------------------------------------- 1 | ./outputs/ 2 | ./.cache/ 3 | ./checkpoints/ 4 | ./tall_masks/ 5 | -------------------------------------------------------------------------------- /fusion_bench/method/moe_pruner/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .score import layer_load_balance_score 2 | -------------------------------------------------------------------------------- /fusion_bench/models/surgery/__init__.py: -------------------------------------------------------------------------------- 1 | from .surgerymodelwrapper import SurgeryModelWrapper 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | gtsrb: tanganke/clip-vit-base-patch16_gtsrb 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_mnist.yaml: -------------------------------------------------------------------------------- 1 | mnist: tanganke/clip-vit-base-patch16_mnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_pcam.yaml: -------------------------------------------------------------------------------- 1 | pcam: tanganke/clip-vit-base-patch16_pcam 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_stl10.yaml: -------------------------------------------------------------------------------- 1 | stl10: tanganke/clip-vit-base-patch16_stl10 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_svhn.yaml: -------------------------------------------------------------------------------- 1 | svhn: tanganke/clip-vit-base-patch16_svhn 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | gtsrb: tanganke/clip-vit-base-patch32_gtsrb 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_mnist.yaml: -------------------------------------------------------------------------------- 1 | mnist: tanganke/clip-vit-base-patch32_mnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_pcam.yaml: -------------------------------------------------------------------------------- 1 | pcam: tanganke/clip-vit-base-patch32_pcam 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_stl10.yaml: -------------------------------------------------------------------------------- 1 | stl10: tanganke/clip-vit-base-patch32_stl10 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_svhn.yaml: -------------------------------------------------------------------------------- 1 | svhn: tanganke/clip-vit-base-patch32_svhn 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_pcam.yaml: -------------------------------------------------------------------------------- 1 | pcam: tanganke/clip-vit-large-patch14_pcam 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_svhn.yaml: -------------------------------------------------------------------------------- 1 | svhn: tanganke/clip-vit-large-patch14_svhn 2 | -------------------------------------------------------------------------------- /config/taskpool/dummy.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.taskpool.DummyTaskPool 2 | model_save_path: null 3 | -------------------------------------------------------------------------------- /fusion_bench/modelpool/openclip_vision/__init__.py: -------------------------------------------------------------------------------- 1 | from .modelpool import OpenCLIPVisionModelPool 2 | -------------------------------------------------------------------------------- /fusion_bench/taskpool/llama/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_generation import LlamaTestGenerationTaskPool 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml: -------------------------------------------------------------------------------- 1 | cifar10: tanganke/clip-vit-base-patch16_cifar10 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml: -------------------------------------------------------------------------------- 1 | eurosat: tanganke/clip-vit-base-patch16_eurosat 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml: -------------------------------------------------------------------------------- 1 | fer2013: tanganke/clip-vit-base-patch16_fer2013 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_food101.yaml: -------------------------------------------------------------------------------- 1 | food101: tanganke/clip-vit-base-patch16_food101 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml: -------------------------------------------------------------------------------- 1 | kmnist: tanganke/clip-vit-base-patch16_kmnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_sun397.yaml: -------------------------------------------------------------------------------- 1 | sun397: tanganke/clip-vit-base-patch16_sun397 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml: -------------------------------------------------------------------------------- 1 | cifar10: tanganke/clip-vit-base-patch32_cifar10 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml: -------------------------------------------------------------------------------- 1 | eurosat: tanganke/clip-vit-base-patch32_eurosat 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml: -------------------------------------------------------------------------------- 1 | fer2013: tanganke/clip-vit-base-patch32_fer2013 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_food101.yaml: -------------------------------------------------------------------------------- 1 | food101: tanganke/clip-vit-base-patch32_food101 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml: -------------------------------------------------------------------------------- 1 | kmnist: tanganke/clip-vit-base-patch32_kmnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_sun397.yaml: -------------------------------------------------------------------------------- 1 | sun397: tanganke/clip-vit-base-patch32_sun397 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | gtsrb: tanganke/clip-vit-large-patch14_gtsrb 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml: -------------------------------------------------------------------------------- 1 | kmnist: tanganke/clip-vit-large-patch14_kmnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_mnist.yaml: -------------------------------------------------------------------------------- 1 | mnist: tanganke/clip-vit-large-patch14_mnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_stl10.yaml: -------------------------------------------------------------------------------- 1 | stl10: tanganke/clip-vit-large-patch14_stl10 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_sun397.yaml: -------------------------------------------------------------------------------- 1 | sun397: tanganke/clip-vit-large-patch14_sun397 2 | -------------------------------------------------------------------------------- /fusion_bench/method/doge_ta/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .doge_ta import DOGE_TA_Algorithm 3 | -------------------------------------------------------------------------------- /fusion_bench/method/tall_mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .task_arithmetic import TallMaskTaskArithmeticAlgorithm 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml: -------------------------------------------------------------------------------- 1 | cifar100: tanganke/clip-vit-base-patch16_cifar100 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml: -------------------------------------------------------------------------------- 1 | resisc45: tanganke/clip-vit-base-patch16_resisc45 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml: -------------------------------------------------------------------------------- 1 | cifar100: tanganke/clip-vit-base-patch32_cifar100 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml: -------------------------------------------------------------------------------- 1 | resisc45: tanganke/clip-vit-base-patch32_resisc45 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml: -------------------------------------------------------------------------------- 1 | cifar10: tanganke/clip-vit-large-patch14_cifar10 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml: -------------------------------------------------------------------------------- 1 | cifar100: tanganke/clip-vit-large-patch14_cifar100 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml: -------------------------------------------------------------------------------- 1 | eurosat: tanganke/clip-vit-large-patch14_eurosat 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml: -------------------------------------------------------------------------------- 1 | fer2013: tanganke/clip-vit-large-patch14_fer2013 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_food101.yaml: -------------------------------------------------------------------------------- 1 | food101: tanganke/clip-vit-large-patch14_food101 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml: -------------------------------------------------------------------------------- 1 | resisc45: tanganke/clip-vit-large-patch14_resisc45 2 | -------------------------------------------------------------------------------- /docs/images/model_mixing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_mixing.png -------------------------------------------------------------------------------- /fusion_bench/metrics/continual_learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .backward_transfer import compute_backward_transfer 2 | -------------------------------------------------------------------------------- /fusion_bench/modelpool/seq2seq_lm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .modelpool import Seq2SeqLMPool 3 | -------------------------------------------------------------------------------- /fusion_bench/taskpool/openclip_vision/__init__.py: -------------------------------------------------------------------------------- 1 | from .openclip_taskpool import OpenCLIPVisionModelTaskPool 2 | -------------------------------------------------------------------------------- /config/dataset/summarization/xsum.yaml: -------------------------------------------------------------------------------- 1 | xsum: 2 | _target_: datasets.load_dataset 3 | path: EdinburghNLP/xsum 4 | -------------------------------------------------------------------------------- /docs/images/model_ensemble.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_ensemble.png -------------------------------------------------------------------------------- /docs/images/model_merging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_merging.png -------------------------------------------------------------------------------- /examples/smile_upscaling/.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | /collected_results/ 3 | /outputs/ 4 | /results/ 5 | /results.backup/ 6 | -------------------------------------------------------------------------------- /fusion_bench/models/masks/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .mask_model import MaskModel, mask_sparsity 3 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/clip_dataset.py: -------------------------------------------------------------------------------- 1 | from fusion_bench.dataset.clip_dataset import CLIPDataset 2 | -------------------------------------------------------------------------------- /config/fabric/loggers/mlflow_logger.yaml: -------------------------------------------------------------------------------- 1 | # https://mlflow.org/ 2 | _target_: lightning.pytorch.loggers.MLFlowLogger 3 | -------------------------------------------------------------------------------- /docs/algorithms/images/ewemoe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ewemoe.png -------------------------------------------------------------------------------- /docs/algorithms/images/wemoe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/wemoe.png -------------------------------------------------------------------------------- /docs/api/fusion_bench.optim.md: -------------------------------------------------------------------------------- 1 | # fusion_bench.optim 2 | 3 | ## MeZO optimizer 4 | 5 | ::: fusion_bench.optim.MeZO 6 | -------------------------------------------------------------------------------- /docs/cli/images/vscode_debug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/vscode_debug.png -------------------------------------------------------------------------------- /docs/images/fusion_bench_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/fusion_bench_flow.png -------------------------------------------------------------------------------- /docs/images/model_upscaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_upscaling.png -------------------------------------------------------------------------------- /docs/modelpool/clip-vit-cos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/clip-vit-cos.png -------------------------------------------------------------------------------- /docs/readinglist/images/watt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/watt.png -------------------------------------------------------------------------------- /fusion_bench/__main__.py: -------------------------------------------------------------------------------- 1 | from fusion_bench.scripts.cli import main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /fusion_bench/method/slerp/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .slerp import SlerpForCausalLM, SlerpMergeAlgorithm 3 | -------------------------------------------------------------------------------- /fusion_bench/method/ties_merging/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .ties_merging import TiesMergingAlgorithm 3 | -------------------------------------------------------------------------------- /config/method/doge_ta/doge_ta.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.DOGE_TA_Algorithm 2 | subspace: 6 3 | K: 30 4 | lamda: 0.07 5 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | fashion_mnist: tanganke/clip-vit-base-patch32_fashion_mnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | rendered-sst2: tanganke/clip-vit-base-patch32_rendered-sst2 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | fashion_mnist: tanganke/clip-vit-large-patch14_fashion_mnist 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | rendered-sst2: tanganke/clip-vit-large-patch14_rendered-sst2 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | stanford-cars: tanganke/clip-vit-large-patch14_stanford-cars 2 | -------------------------------------------------------------------------------- /docs/algorithms/images/bitdelta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/bitdelta.png -------------------------------------------------------------------------------- /docs/algorithms/images/ewemoe_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ewemoe_1.png -------------------------------------------------------------------------------- /docs/algorithms/images/ewemoe_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ewemoe_2.png -------------------------------------------------------------------------------- /docs/algorithms/images/pwe_moe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/pwe_moe.png -------------------------------------------------------------------------------- /docs/algorithms/images/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/sigmoid.png -------------------------------------------------------------------------------- /docs/cli/images/pycharm_debug_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/pycharm_debug_1.png -------------------------------------------------------------------------------- /docs/cli/images/pycharm_debug_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/pycharm_debug_2.png -------------------------------------------------------------------------------- /docs/cli/images/pycharm_debug_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/pycharm_debug_3.png -------------------------------------------------------------------------------- /docs/cli/images/tab_completion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/tab_completion.png -------------------------------------------------------------------------------- /docs/images/learning_paradiagm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/learning_paradiagm.png -------------------------------------------------------------------------------- /docs/readinglist/images/fusellm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/fusellm.png -------------------------------------------------------------------------------- /docs/readinglist/images/lorahub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/lorahub.png -------------------------------------------------------------------------------- /docs/readinglist/images/pwe_moe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/pwe_moe.png -------------------------------------------------------------------------------- /examples/smile_upscaling/SMILE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/examples/smile_upscaling/SMILE.png -------------------------------------------------------------------------------- /fusion_bench/method/ada_svd/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_vision import AdaSVDMergingForCLIPVisionModel 3 | -------------------------------------------------------------------------------- /fusion_bench/method/dawe/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .dawe_for_clip import DataAdaptiveWeightEnsemblingForCLIP 3 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | emnist_letters: tanganke/clip-vit-base-patch32_emnist_letters 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | oxford-iiit-pet: tanganke/clip-vit-base-patch32_oxford-iiit-pet 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | emnist_letters: tanganke/clip-vit-large-patch14_emnist_letters 2 | -------------------------------------------------------------------------------- /docs/algorithms/images/adamerging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/adamerging.png -------------------------------------------------------------------------------- /docs/algorithms/images/iso_merging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/iso_merging.png -------------------------------------------------------------------------------- /docs/algorithms/images/solar10.7B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/solar10.7B.png -------------------------------------------------------------------------------- /docs/cli/images/fusion_bench_webui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/fusion_bench_webui.png -------------------------------------------------------------------------------- /docs/images/multi-task_core_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/multi-task_core_steps.png -------------------------------------------------------------------------------- /docs/readinglist/images/forkmerge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/forkmerge.png -------------------------------------------------------------------------------- /docs/readinglist/images/fs-merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/fs-merge.png -------------------------------------------------------------------------------- /docs/readinglist/images/fusechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/fusechat.png -------------------------------------------------------------------------------- /docs/readinglist/images/lora_lego.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/lora_lego.png -------------------------------------------------------------------------------- /docs/readinglist/images/pituning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/pituning.png -------------------------------------------------------------------------------- /fusion_bench/method/pwe_moe/phn/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .solvers import EPOSolver, LinearScalarizationSolver 3 | -------------------------------------------------------------------------------- /fusion_bench/method/surgery/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip_layer_wise_adamerging_surgery import CLIPLayerWiseAdaMergingSurgeryAlgorithm 2 | -------------------------------------------------------------------------------- /fusion_bench/utils/strenum/README.md: -------------------------------------------------------------------------------- 1 | This is a alternative implementation for `enum.StrEnum` (only available for python > 3.11). 2 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/mnist.yaml: -------------------------------------------------------------------------------- 1 | mnist: 2 | _target_: datasets.load_dataset 3 | path: mnist 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/mnist.yaml: -------------------------------------------------------------------------------- 1 | mnist: 2 | _target_: datasets.load_dataset 3 | path: mnist 4 | split: train 5 | -------------------------------------------------------------------------------- /config/method/isotropic_merging/iso_c.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.ISO_C_Merge 2 | scaling_factor: 1.0 3 | exclude_keys: null 4 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102 2 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | oxford-iiit-pet: tanganke/clip-vit-large-patch14_oxford-iiit-pet 2 | -------------------------------------------------------------------------------- /docs/algorithms/images/ties_merging.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ties_merging.jpg -------------------------------------------------------------------------------- /docs/images/multi-task_model_fusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/multi-task_model_fusion.png -------------------------------------------------------------------------------- /docs/modelpool/images/convnext_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/convnext_block.png -------------------------------------------------------------------------------- /docs/readinglist/images/adapter_soup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/adapter_soup.png -------------------------------------------------------------------------------- /docs/readinglist/images/twin_merging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/twin_merging.png -------------------------------------------------------------------------------- /fusion_bench/models/chat_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .load_tokenizer import chat_template_mapping, load_tokenizer_with_chat_template 2 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/dtd.yaml: -------------------------------------------------------------------------------- 1 | dtd: 2 | _target_: datasets.load_dataset 3 | path: tanganke/dtd 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/fer2013.yaml: -------------------------------------------------------------------------------- 1 | fer2013: 2 | _target_: fusion_bench.dataset.fer2013.load_fer2013 3 | split: test 4 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/dtd.yaml: -------------------------------------------------------------------------------- 1 | dtd: 2 | _target_: datasets.load_dataset 3 | path: tanganke/dtd 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/summarization/test/xsum.yaml: -------------------------------------------------------------------------------- 1 | xsum: 2 | _target_: datasets.load_dataset 3 | path: EdinburghNLP/xsum 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/summarization/train/xsum.yaml: -------------------------------------------------------------------------------- 1 | xsum: 2 | _target_: datasets.load_dataset 3 | path: EdinburghNLP/xsum 4 | split: train 5 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | oxford_flowers102: tanganke/clip-vit-large-patch14_oxford_flowers102 2 | -------------------------------------------------------------------------------- /docs/algorithms/images/Task Arithmetic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/Task Arithmetic.png -------------------------------------------------------------------------------- /docs/algorithms/images/smile_upscaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/smile_upscaling.png -------------------------------------------------------------------------------- /docs/algorithms/images/sparse_upcycling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/sparse_upcycling.png -------------------------------------------------------------------------------- /docs/algorithms/images/wemoe_lr_tuning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/wemoe_lr_tuning.png -------------------------------------------------------------------------------- /docs/images/accelerate model training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/accelerate model training.png -------------------------------------------------------------------------------- /docs/images/framework_of_model_fusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/framework_of_model_fusion.png -------------------------------------------------------------------------------- /docs/readinglist/images/depth_upscaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/depth_upscaling.png -------------------------------------------------------------------------------- /docs/readinglist/images/scaling_smart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/scaling_smart.png -------------------------------------------------------------------------------- /docs/readinglist/images/smile_upscaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/smile_upscaling.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # examples Folder 2 | 3 | This directory contains various examples demonstrating the usage of the Fusion Bench library. 4 | -------------------------------------------------------------------------------- /fusion_bench/method/fw_merging/__init__.py: -------------------------------------------------------------------------------- 1 | from .fw_hard import FrankWolfeHardAlgorithm 2 | from .fw_soft import FrankWolfeSoftAlgorithm 3 | -------------------------------------------------------------------------------- /fusion_bench/method/trust_region/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_task_arithmetic import TaskArithmeticWithTrustRegionForCLIP 3 | -------------------------------------------------------------------------------- /fusion_bench/taskpool/lm_eval_harness/__init__.py: -------------------------------------------------------------------------------- 1 | from .taskpool import LMEvalHarnessTaskPool 2 | 3 | __all__ = ["LMEvalHarnessTaskPool"] 4 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/gtsrb.yaml: -------------------------------------------------------------------------------- 1 | gtsrb: 2 | _target_: datasets.load_dataset 3 | path: tanganke/gtsrb 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/kmnist.yaml: -------------------------------------------------------------------------------- 1 | kmnist: 2 | _target_: datasets.load_dataset 3 | path: tanganke/kmnist 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/stl10.yaml: -------------------------------------------------------------------------------- 1 | stl10: 2 | _target_: datasets.load_dataset 3 | path: tanganke/stl10 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/sun397.yaml: -------------------------------------------------------------------------------- 1 | sun397: 2 | _target_: datasets.load_dataset 3 | path: tanganke/sun397 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/fer2013.yaml: -------------------------------------------------------------------------------- 1 | fer2013: 2 | _target_: fusion_bench.dataset.fer2013.load_fer2013 3 | split: train 4 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/gtsrb.yaml: -------------------------------------------------------------------------------- 1 | gtsrb: 2 | _target_: datasets.load_dataset 3 | path: tanganke/gtsrb 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/stl10.yaml: -------------------------------------------------------------------------------- 1 | stl10: 2 | _target_: datasets.load_dataset 3 | path: tanganke/stl10 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/summarization/val/xsum.yaml: -------------------------------------------------------------------------------- 1 | xsum: 2 | _target_: datasets.load_dataset 3 | path: EdinburghNLP/xsum 4 | split: validation 5 | -------------------------------------------------------------------------------- /docs/guides/fusion_bench/mixins/lightning_fabric.md: -------------------------------------------------------------------------------- 1 | # LightningFabricMixin 2 | 3 | ## Reference 4 | 5 | ::: fusion_bench.mixins.lightning_fabric -------------------------------------------------------------------------------- /docs/readinglist/images/Chronopoulou2023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/Chronopoulou2023.png -------------------------------------------------------------------------------- /docs/readinglist/images/enneng2024survey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/enneng2024survey.png -------------------------------------------------------------------------------- /docs/readinglist/images/sparse-modelsoups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/sparse-modelsoups.png -------------------------------------------------------------------------------- /docs/readinglist/images/sparse_upcycling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/sparse_upcycling.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | scipy 4 | torch 5 | lightning 6 | transformers 7 | datasets 8 | peft 9 | h5py 10 | scipy 11 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/cifar10.yaml: -------------------------------------------------------------------------------- 1 | cifar10: 2 | _target_: datasets.load_dataset 3 | path: tanganke/cifar10 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/eurosat.yaml: -------------------------------------------------------------------------------- 1 | eurosat: 2 | _target_: datasets.load_dataset 3 | path: tanganke/eurosat 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/food101.yaml: -------------------------------------------------------------------------------- 1 | food101: 2 | _target_: datasets.load_dataset 3 | path: ethz/food101 4 | split: validation 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/pcam.yaml: -------------------------------------------------------------------------------- 1 | pcam: 2 | _target_: datasets.load_dataset 3 | path: 1aurent/PatchCamelyon 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/cifar10.yaml: -------------------------------------------------------------------------------- 1 | cifar10: 2 | _target_: datasets.load_dataset 3 | path: tanganke/cifar10 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/eurosat.yaml: -------------------------------------------------------------------------------- 1 | eurosat: 2 | _target_: datasets.load_dataset 3 | path: tanganke/eurosat 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/food101.yaml: -------------------------------------------------------------------------------- 1 | food101: 2 | _target_: datasets.load_dataset 3 | path: ethz/food101 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/kmnist.yaml: -------------------------------------------------------------------------------- 1 | kmnist: 2 | _target_: datasets.load_dataset 3 | path: tanganke/kmnist 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/pcam.yaml: -------------------------------------------------------------------------------- 1 | pcam: 2 | _target_: datasets.load_dataset 3 | path: 1aurent/PatchCamelyon 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/sun397.yaml: -------------------------------------------------------------------------------- 1 | sun397: 2 | _target_: datasets.load_dataset 3 | path: tanganke/sun397 4 | split: train 5 | -------------------------------------------------------------------------------- /docs/algorithms/images/max-model_predictor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/max-model_predictor.png -------------------------------------------------------------------------------- /docs/algorithms/images/wemoe_loss_landscape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/wemoe_loss_landscape.png -------------------------------------------------------------------------------- /docs/modelpool/images/clip_eight_corruption.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/clip_eight_corruption.png -------------------------------------------------------------------------------- /docs/readinglist/images/branch_and_merging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/branch_and_merging.png -------------------------------------------------------------------------------- /fusion_bench/method/task_arithmetic/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .task_arithmetic import TaskArithmeticAlgorithm, task_arithmetic_merge 3 | -------------------------------------------------------------------------------- /fusion_bench/modelpool/causal_lm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .causal_lm import CausalLMBackbonePool, CausalLMPool, load_peft_causal_lm 3 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/cifar100.yaml: -------------------------------------------------------------------------------- 1 | cifar100: 2 | _target_: datasets.load_dataset 3 | path: tanganke/cifar100 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/resisc45.yaml: -------------------------------------------------------------------------------- 1 | resisc45: 2 | _target_: datasets.load_dataset 3 | path: tanganke/resisc45 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/cifar100.yaml: -------------------------------------------------------------------------------- 1 | cifar100: 2 | _target_: datasets.load_dataset 3 | path: tanganke/cifar100 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/resisc45.yaml: -------------------------------------------------------------------------------- 1 | resisc45: 2 | _target_: datasets.load_dataset 3 | path: tanganke/resisc45 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/text_generation/test/gsm8k.yaml: -------------------------------------------------------------------------------- 1 | gsm8k: 2 | _target_: datasets.load_dataset 3 | path: openai/gsm8k 4 | name: main 5 | split: test 6 | -------------------------------------------------------------------------------- /config/dataset/text_generation/train/gsm8k.yaml: -------------------------------------------------------------------------------- 1 | gsm8k: 2 | _target_: datasets.load_dataset 3 | path: openai/gsm8k 4 | name: main 5 | split: train 6 | -------------------------------------------------------------------------------- /docs/readinglist/images/branch_and_merging_alg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/branch_and_merging_alg.png -------------------------------------------------------------------------------- /fusion_bench/method/sparselo/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .sparselo import IterativeSparseLoForLlama, PCPSparseLoForLlama, SparseLoForLlama 3 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/rendered_sst2.py: -------------------------------------------------------------------------------- 1 | classnames = ["negative", "positive"] 2 | 3 | templates = [lambda c: f"a {c} review of a movie."] 4 | -------------------------------------------------------------------------------- /config/dataset/llm_sft/ultrachat_200k.yaml: -------------------------------------------------------------------------------- 1 | ultrachat-200k: 2 | _target_: fusion_bench.dataset.ultrachat.load_tokenized_ultrachat_200k 3 | tokenizer: ??? 4 | -------------------------------------------------------------------------------- /config/dataset/question_answering/train/MetaMathQA.yaml: -------------------------------------------------------------------------------- 1 | MetaMathQA: 2 | _target_: datasets.load_dataset 3 | path: meta-math/MetaMathQA 4 | split: train 5 | -------------------------------------------------------------------------------- /docs/algorithms/images/concrete_subspace_learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/concrete_subspace_learning.png -------------------------------------------------------------------------------- /docs/algorithms/images/fedmr_model_recombination.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/fedmr_model_recombination.jpg -------------------------------------------------------------------------------- /docs/modelpool/images/NYUv2-0000003446-63769b25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/NYUv2-0000003446-63769b25.jpg -------------------------------------------------------------------------------- /config/dataset/image_classification/test/cub-200-2011.yaml: -------------------------------------------------------------------------------- 1 | cub-200-2011: 2 | _target_: datasets.load_dataset 3 | path: Donghyun99/CUB-200-2011 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/emnist_mnist.yaml: -------------------------------------------------------------------------------- 1 | emnist_mnist: 2 | _target_: datasets.load_dataset 3 | path: tanganke/emnist_mnist 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | rendered-sst2: 2 | _target_: datasets.load_dataset 3 | path: nateraw/rendered-sst2 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/emnist_mnist.yaml: -------------------------------------------------------------------------------- 1 | emnist_mnist: 2 | _target_: datasets.load_dataset 3 | path: tanganke/emnist_mnist 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/text_generation/train/CodeAlpaca-20k.yaml: -------------------------------------------------------------------------------- 1 | MetaMathQA: 2 | _target_: datasets.load_dataset 3 | path: sahil2801/CodeAlpaca-20k 4 | split: train 5 | -------------------------------------------------------------------------------- /config/method/dare/simple_average.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.DareSimpleAverage 2 | sparsity_ratio: 0.5 3 | only_on_linear_weights: false 4 | rescale: true 5 | -------------------------------------------------------------------------------- /docs/algorithms/images/regmean_vs_regmean_plusplus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/regmean_vs_regmean_plusplus.png -------------------------------------------------------------------------------- /fusion_bench/method/bitdelta/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from https://github.com/FasterDecoding/BitDelta 3 | """ 4 | 5 | from .bitdelta import BitDeltaAlgorithm 6 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | emnist_letters: 2 | _target_: datasets.load_dataset 3 | path: tanganke/emnist_letters 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | oxford-iiit-pet: 2 | _target_: datasets.load_dataset 3 | path: timm/oxford-iiit-pet 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | stanford-cars: 2 | _target_: datasets.load_dataset 3 | path: tanganke/stanford_cars 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/svhn.yaml: -------------------------------------------------------------------------------- 1 | svhn: 2 | _target_: datasets.load_dataset 3 | _args_: 4 | - svhn 5 | - cropped_digits 6 | split: test 7 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/tiny-imagenet.yaml: -------------------------------------------------------------------------------- 1 | tiny-imagenet: 2 | _target_: datasets.load_dataset 3 | path: zh-plus/tiny-imagenet 4 | split: valid 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/cub-200-2011.yaml: -------------------------------------------------------------------------------- 1 | cub-200-2011: 2 | _target_: datasets.load_dataset 3 | path: Donghyun99/CUB-200-2011 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | oxford-iiit-pet: 2 | _target_: datasets.load_dataset 3 | path: timm/oxford-iiit-pet 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | rendered-sst2: 2 | _target_: datasets.load_dataset 3 | path: nateraw/rendered-sst2 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | stanford-cars: 2 | _target_: datasets.load_dataset 3 | path: tanganke/stanford_cars 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/svhn.yaml: -------------------------------------------------------------------------------- 1 | svhn: 2 | _target_: datasets.load_dataset 3 | _args_: 4 | - svhn 5 | - cropped_digits 6 | split: train 7 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/tiny-imagenet.yaml: -------------------------------------------------------------------------------- 1 | tiny-imagenet: 2 | _target_: datasets.load_dataset 3 | path: zh-plus/tiny-imagenet 4 | split: train 5 | -------------------------------------------------------------------------------- /config/method/isotropic_merging/iso_cts.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.ISO_CTS_Merge 2 | scaling_factor: 1.0 3 | common_space_fraction: 0.8 4 | exclude_keys: null 5 | -------------------------------------------------------------------------------- /fusion_bench/method/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | from .task_vector_cos_similarity import TaskVectorCosSimilarity 2 | from .task_vector_violin_plot import TaskVectorViolinPlot 3 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | fashion_mnist: 2 | _target_: datasets.load_dataset 3 | path: zalando-datasets/fashion_mnist 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | emnist_letters: 2 | _target_: datasets.load_dataset 3 | path: tanganke/emnist_letters 4 | split: train 5 | -------------------------------------------------------------------------------- /config/method/tall_mask/task_arithmetic.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.tall_mask.TallMaskTaskArithmeticAlgorithm 2 | tall_mask_lambda: 0.6 3 | debug: 0 4 | verbose: 0 5 | -------------------------------------------------------------------------------- /docs/algorithms/images/adamerging_layerwise_coefficients.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/adamerging_layerwise_coefficients.png -------------------------------------------------------------------------------- /docs/algorithms/images/concrete_adamerging_vs_adamerging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/concrete_adamerging_vs_adamerging.png -------------------------------------------------------------------------------- /fusion_bench/method/rankone_moe/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_rankone_moe import CLIPRankOneMoEAlgorithm 3 | from .rankone_moe import RankOneMoEAlgorithm 4 | -------------------------------------------------------------------------------- /fusion_bench/method/weighted_average/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .llama import WeightedAverageForLLama 3 | from .weighted_average import WeightedAverageAlgorithm 4 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/mango-leaf-disease.yaml: -------------------------------------------------------------------------------- 1 | mango-leaf-disease: 2 | _target_: datasets.load_dataset 3 | path: AfiqN/mango-leaf-disease 4 | split: test 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | fashion_mnist: 2 | _target_: datasets.load_dataset 3 | path: zalando-datasets/fashion_mnist 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/mango-leaf-disease.yaml: -------------------------------------------------------------------------------- 1 | mango-leaf-disease: 2 | _target_: datasets.load_dataset 3 | path: AfiqN/mango-leaf-disease 4 | split: train 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base.yaml: -------------------------------------------------------------------------------- 1 | _pretrained_: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: google/flan-t5-base 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large.yaml: -------------------------------------------------------------------------------- 1 | _pretrained_: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: google/flan-t5-large 4 | -------------------------------------------------------------------------------- /docs/algorithms/images/ties_merging_hyperparameter_tuning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ties_merging_hyperparameter_tuning.png -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/profiling.md: -------------------------------------------------------------------------------- 1 | # Profiling Utilities 2 | 3 | ::: fusion_bench.utils.timer 4 | options: 5 | show_root_full_path: true 6 | heading_level: 3 -------------------------------------------------------------------------------- /fusion_bench/method/pwe_moe/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_pwe_moe import ( 3 | PWEMoELinearScalarizationForCLIP, 4 | PWEMoExactParetoOptimalForCLIP, 5 | ) 6 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | oxford_flowers102: 2 | _target_: datasets.load_dataset 3 | path: dpdl-benchmark/oxford_flowers102 4 | split: test 5 | -------------------------------------------------------------------------------- /docs/algorithms/images/adamerging_model_merging_coefficients.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/adamerging_model_merging_coefficients.png -------------------------------------------------------------------------------- /docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora.png -------------------------------------------------------------------------------- /fusion_bench/method/sparse_we_moe/__init__.py: -------------------------------------------------------------------------------- 1 | from .sparse_clip_we_moe import SparseCLIPWeightEnsemblingMoEAlgorithm 2 | from .sparse_we_moe import SparseWeightEnsemblingMoEAlgorithm 3 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | oxford_flowers102: 2 | _target_: datasets.load_dataset 3 | path: dpdl-benchmark/oxford_flowers102 4 | split: train 5 | -------------------------------------------------------------------------------- /config/dataset/question_answering/search_qa.yaml: -------------------------------------------------------------------------------- 1 | search_qa: 2 | _target_: datasets.load_dataset 3 | _args_: 4 | - search_qa 5 | - train_test_val 6 | trust_remote_code: true 7 | -------------------------------------------------------------------------------- /config/method/dare/task_arithmetic.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.DareTaskArithmetic 2 | scaling_factor: 0.3 3 | sparsity_ratio: 0.5 4 | only_on_linear_weights: false 5 | rescale: true 6 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: dtd 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: gtsrb 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: mnist 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: pcam 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: stl10 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: svhn 4 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/caching.md: -------------------------------------------------------------------------------- 1 | # Caching Utilities 2 | 3 | ::: fusion_bench.utils.cache_utils 4 | options: 5 | show_root_full_path: true 6 | heading_level: 3 7 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/filesystem.md: -------------------------------------------------------------------------------- 1 | # FileSystem Utilities 2 | 3 | ::: fusion_bench.utils.path 4 | options: 5 | show_root_full_path: true 6 | heading_level: 3 7 | -------------------------------------------------------------------------------- /fusion_bench/method/we_moe/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_we_moe import CLIPWeightEnsemblingMoEAlgorithm 3 | from .flan_t5_we_moe import FlanT5WeightEnsemblingMoEAlgorithm 4 | -------------------------------------------------------------------------------- /fusion_bench/metrics/model_kinship/__init__.py: -------------------------------------------------------------------------------- 1 | # Exploring Model Kinship for Merging LLMs 2 | # The implementation of this module is borrowed from: https://github.com/zjunlp/ModelKinship/ 3 | -------------------------------------------------------------------------------- /fusion_bench/modelpool/seq_classification_lm/__init__.py: -------------------------------------------------------------------------------- 1 | from .reward_model import create_reward_model_from_pretrained 2 | from .seq_classification_lm import SequenceClassificationModelPool 3 | -------------------------------------------------------------------------------- /fusion_bench/utils/set.py: -------------------------------------------------------------------------------- 1 | __all__ = ["union"] 2 | 3 | 4 | def union(*iters) -> set: 5 | if len(iters) == 0: 6 | return set() 7 | s = set().union(*iters) 8 | return s 9 | -------------------------------------------------------------------------------- /config/_get_started/greeting_program.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench._get_started.greeting_program.GreetingProgram 2 | message: "Welcome to FusionBench" 3 | name: "Developer" 4 | repeat_count: 3 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-qqp.yaml: -------------------------------------------------------------------------------- 1 | glue-qqp: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-qqp 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-rte.yaml: -------------------------------------------------------------------------------- 1 | glue-rte: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-rte 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: cifar10 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: eurosat 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: fer2013 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: food101 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: kmnist 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: sun397 4 | -------------------------------------------------------------------------------- /docs/algorithms/pruning/images/llama_2_4_semistructued_first_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/pruning/images/llama_2_4_semistructued_first_layer.png -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/modelscope.md: -------------------------------------------------------------------------------- 1 | # ModelScope Integration 2 | 3 | ::: fusion_bench.utils.modelscope 4 | options: 5 | show_root_full_path: true 6 | heading_level: 3 7 | -------------------------------------------------------------------------------- /fusion_bench/method/depth_upscaling/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .depth_upscaling import DepthUpscalingAlgorithm 3 | from .depth_upscaling_for_llama import DepthUpscalingForLlama 4 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/mnist.py: -------------------------------------------------------------------------------- 1 | classnames = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] 2 | 3 | templates = [ 4 | lambda c: f'a photo of the number: "{c}".', 5 | ] 6 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/svhn.py: -------------------------------------------------------------------------------- 1 | classnames = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] 2 | 3 | templates = [ 4 | lambda c: f'a photo of the number: "{c}".', 5 | ] 6 | -------------------------------------------------------------------------------- /config/method/pruning/magnitude_diff_pruning.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.MagnitudeDiffPruningAlgorithm 2 | prune_ratio: 0.5 3 | rescale: false 4 | extract_names: null 5 | prune_type: minor 6 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-cola.yaml: -------------------------------------------------------------------------------- 1 | glue-cola: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-cola 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-mnli.yaml: -------------------------------------------------------------------------------- 1 | glue-mnli: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-mnli 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-mrpc.yaml: -------------------------------------------------------------------------------- 1 | glue-mrpc: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-mrpc 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-qnli.yaml: -------------------------------------------------------------------------------- 1 | glue-qnli: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-qnli 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-sst2.yaml: -------------------------------------------------------------------------------- 1 | glue-sst2: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-sst2 4 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-stsb.yaml: -------------------------------------------------------------------------------- 1 | glue-stsb: 2 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 3 | pretrained_model_name_or_path: tanganke/flan-t5-base_glue-stsb 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: cifar100 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: resisc45 4 | -------------------------------------------------------------------------------- /docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora_average.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora_average.png -------------------------------------------------------------------------------- /fusion_bench/method/regmean_plusplus/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_regmean_plusplus import RegMeanAlgorithmForCLIPPlusPlus 3 | from .regmean_plusplus import RegMeanAlgorithmPlusPlus 4 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/pcam.py: -------------------------------------------------------------------------------- 1 | classnames = ["lymph node", "lymph node containing metastatic tumor tissue"] 2 | 3 | templates = [ 4 | lambda c: f"this is a photo of {c}", 5 | ] 6 | -------------------------------------------------------------------------------- /config/dataset/image_classification/test/the_eight_tasks.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - sun397 3 | - stanford-cars 4 | - resisc45 5 | - eurosat 6 | - svhn 7 | - gtsrb 8 | - mnist 9 | - dtd 10 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/the_eight_tasks.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - sun397 3 | - stanford-cars 4 | - resisc45 5 | - eurosat 6 | - svhn 7 | - gtsrb 8 | - mnist 9 | - dtd 10 | -------------------------------------------------------------------------------- /config/method/analysis/task_vector_violin_plot.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.TaskVectorViolinPlot 2 | trainable_only: true 3 | max_points_per_model: 1000 4 | fig_kwargs: null 5 | output_path: null 6 | -------------------------------------------------------------------------------- /fusion_bench/method/dare/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .simple_average import DareSimpleAverage 3 | from .task_arithmetic import DareTaskArithmetic 4 | from .ties_merging import DareTiesMerging 5 | -------------------------------------------------------------------------------- /fusion_bench/method/lm_finetune/__init__.py: -------------------------------------------------------------------------------- 1 | from .bradley_terry_rm import BradleyTerryRewardModeling 2 | from .fullfinetune_sft import FullFinetuneSFT 3 | from .peftfinetune_sft import PeftFinetuneSFT 4 | -------------------------------------------------------------------------------- /fusion_bench/method/moe_pruner/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .deepseek_v2 import ( 2 | MoEPrunerHookFnForDeepseekV2Gate, 3 | MoEPrunerHookFnForDeepseekV2Linear, 4 | ) 5 | from .hook import BaseHookFn 6 | -------------------------------------------------------------------------------- /config/dataset/image_classification/train/the_eight_tasks.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - sun397 3 | - stanford-cars 4 | - resisc45 5 | - eurosat 6 | - svhn 7 | - gtsrb 8 | - mnist 9 | - dtd 10 | -------------------------------------------------------------------------------- /config/dataset/question_answering/test/search_qa.yaml: -------------------------------------------------------------------------------- 1 | search_qa: 2 | _target_: datasets.load_dataset 3 | _args_: 4 | - search_qa 5 | - train_test_val 6 | trust_remote_code: true 7 | split: test 8 | -------------------------------------------------------------------------------- /config/dataset/text_generation/test/gsm-hard.yaml: -------------------------------------------------------------------------------- 1 | gsm-hard: 2 | _target_: datasets.load_dataset 3 | path: reasoning-machines/gsm-hard 4 | split: train # this dataset is used to evaluate math reasoning 5 | -------------------------------------------------------------------------------- /config/method/analysis/task_vector_cos_similarity.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.TaskVectorCosSimilarity 2 | plot_heatmap: true 3 | trainable_only: true 4 | max_points_per_model: null 5 | output_path: null 6 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: emnist_letters 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: fashion_mnist 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: oxford-iiit-pet 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: rendered-sst2 4 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: stanford-cars 4 | -------------------------------------------------------------------------------- /fusion_bench/dataset/arc_agi/__init__.py: -------------------------------------------------------------------------------- 1 | from .arc_agi import ( 2 | load_tokenized_arc_agi_dataset, 3 | load_tokenized_arc_agi_dataset_for_ttt, 4 | process_task, 5 | process_task_for_ttt, 6 | ) 7 | -------------------------------------------------------------------------------- /fusion_bench/method/regmean/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_regmean import RegMeanAlgorithmForCLIP 3 | from .gpt2_regmean import RegMeanAlgorithmForGPT2 4 | from .regmean import RegMeanAlgorithm 5 | -------------------------------------------------------------------------------- /config/dataset/question_answering/train/search_qa.yaml: -------------------------------------------------------------------------------- 1 | search_qa: 2 | _target_: datasets.load_dataset 3 | _args_: 4 | - search_qa 5 | - train_test_val 6 | trust_remote_code: true 7 | split: train 8 | -------------------------------------------------------------------------------- /config/dataset/question_answering/val/search_qa.yaml: -------------------------------------------------------------------------------- 1 | search_qa: 2 | _target_: datasets.load_dataset 3 | _args_: 4 | - search_qa 5 | - train_test_val 6 | trust_remote_code: true 7 | split: validation 8 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: oxford_flowers102 4 | -------------------------------------------------------------------------------- /config/taskpool/nyuv2_taskpool.yaml: -------------------------------------------------------------------------------- 1 | type: NYUv2TaskPool 2 | data_dir: .cache 3 | tasks: 4 | - segmentation 5 | - depth 6 | - normal 7 | # per-GPU batch size 8 | batch_size: 16 9 | num_workers: 4 10 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/val@test_datasets: oxford_flowers102 4 | -------------------------------------------------------------------------------- /examples/ada_svd/clip_vision.sh: -------------------------------------------------------------------------------- 1 | fusion_bench \ 2 | method=ada_svd/clip_vision \ 3 | modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8 \ 4 | taskpool=CLIPVisionModelTaskPool/clip-vit-base-patch32_TA8 5 | -------------------------------------------------------------------------------- /fusion_bench/method/smile_upscaling/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .singular_projection_merging import SingularProjectionMergingAlgorithm 3 | from .smile_upscaling import SmileMoELinear, SmileUpscalingAlgorithm 4 | -------------------------------------------------------------------------------- /examples/adamerging/gpt_2.sh: -------------------------------------------------------------------------------- 1 | # Layer-wise AdaMerigng for GPT-2 2 | fusion_bench \ 3 | method=adamerging/layer_wise_gpt2 \ 4 | method.max_steps=400 \ 5 | modelpool=test/test.yaml \ 6 | taskpool=test/test.yaml 7 | 8 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mistral-7b.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: mistralai/Mistral-7B-v0.1 4 | tokenizer: ${.models._pretrained_} 5 | model_kwargs: 6 | torch_dtype: bfloat16 7 | -------------------------------------------------------------------------------- /config/nyuv2_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - fabric_model_fusion 3 | - override method: simple_average 4 | - override modelpool: nyuv2_modelpool 5 | - override taskpool: nyuv2_taskpool 6 | - _self_ 7 | trainer: 8 | devices: 1 9 | -------------------------------------------------------------------------------- /fusion_bench/method/moe_pruner/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of MoE-Pruner 3 | 4 | MoE-Pruner: Pruning Mixture-of-Experts Large Language Model using the Hints from Its Router 5 | """ 6 | 7 | from .moe_pruner import MoEPruner 8 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/emnist_mnist.py: -------------------------------------------------------------------------------- 1 | # https://huggingface.co/datasets/tanganke/emnist_mnist 2 | classnames = [str(i) for i in range(10)] 3 | templates = [ 4 | lambda c: f'a photo of the number: "{c}".', 5 | ] 6 | -------------------------------------------------------------------------------- /config/dataset/llm_sft/alpaca_cleaned.yaml: -------------------------------------------------------------------------------- 1 | alpaca-cleaned: 2 | _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset 3 | tokenizer: ??? 4 | path: "yahma/alpaca-cleaned" 5 | split: train 6 | cache_path: null 7 | -------------------------------------------------------------------------------- /fusion_bench/method/gossip/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip_layer_wise_gossip import CLIPLayerWiseGossipAlgorithm 2 | from .clip_task_wise_gossip import CLIPTaskWiseGossipAlgorithm 3 | from .flan_t5_layer_wise_gossip import FlanT5LayerWiseGossipAlgorithm 4 | -------------------------------------------------------------------------------- /config/dataset/text_generation/test/gsm8k_question_label.yaml: -------------------------------------------------------------------------------- 1 | qsm8k: 2 | _target_: fusion_bench.dataset.gsm8k.load_gsm8k_question_label_dataset 3 | dataset_name: test # this option can be 'train', 'test', 'train_socratic', and 'test_socratic' 4 | -------------------------------------------------------------------------------- /config/dataset/text_generation/train/gsm8k_question_label.yaml: -------------------------------------------------------------------------------- 1 | qsm8k: 2 | _target_: fusion_bench.dataset.gsm8k.load_gsm8k_question_label_dataset 3 | dataset_name: train # this option can be 'train', 'test', 'train_socratic', and 'test_socratic' 4 | -------------------------------------------------------------------------------- /config/llama_full_finetune.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - fabric_model_fusion 3 | - override fabric: llama_fsdp 4 | - override method: lm_finetune/fullfinetune_sft.yaml 5 | - override modelpool: CausalLMPool/llama_alpaca_cleaned.yaml 6 | - _self_ 7 | -------------------------------------------------------------------------------- /config/method/ada_svd/clip_vision.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.AdaSVDMergingForCLIPVisionModel 2 | scaling_factor: null 3 | num_samples: 256 4 | gate_k: 16 5 | average_experts: false 6 | device: cuda 7 | upscaling_accelerator: null 8 | seed: 0 9 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-cola_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-cola: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-cola_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-mnli_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-mnli: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-mnli_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-mrpc_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-mrpc: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-mrpc_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-qnli_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-qnli: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-qnli_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-qqp_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-qqp: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-qqp_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-rte_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-rte: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-rte_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-sst2_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-sst2: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-sst2_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-base_glue-stsb_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-stsb: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-base 4 | peft_model_path: tanganke/flan-t5-base_glue-stsb_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-qqp_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-qqp: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-qqp_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-rte_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-rte: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-rte_lora-16 5 | -------------------------------------------------------------------------------- /config/fabric/loggers/wandb_logger.yaml: -------------------------------------------------------------------------------- 1 | # https://lightning.ai/docs/fabric/2.4.0/guide/loggers/wandb.html#weights-and-biases 2 | _target_: wandb.integration.lightning.fabric.WandbLogger 3 | project: ${hydra:job.config_name} 4 | save_dir: ${path.log_dir} 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-cola_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-cola: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-cola_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-mnli_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-mnli: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-mnli_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-mrpc_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-mrpc: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-mrpc_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-qnli_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-qnli: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-qnli_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-sst2_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-sst2: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-sst2_lora-16 5 | -------------------------------------------------------------------------------- /config/model/flan-t5/flan-t5-large_glue-stsb_lora-16.yaml: -------------------------------------------------------------------------------- 1 | glue-stsb: 2 | _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model 3 | base_model_path: google/flan-t5-large 4 | peft_model_path: tanganke/flan-t5-large_glue-stsb_lora-16 5 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 2 | _recursive_: False 3 | models: 4 | _pretrained_: openai/clip-vit-base-patch32 5 | processor: ${.models._pretrained_} 6 | -------------------------------------------------------------------------------- /examples/gossip/flan_t5.sh: -------------------------------------------------------------------------------- 1 | # Layer-wise gossip 2 | fusion_bench \ 3 | method=gossip/layer_wise_flan_t5 \ 4 | method.lr=1e-3 \ 5 | modelpool=Seq2SeqLMPool/flan-t5-base_glue_lora16_tta \ 6 | taskpool=taskpool=flan-t5_glue_text_generation 7 | -------------------------------------------------------------------------------- /fusion_bench/method/opcm/__init__.py: -------------------------------------------------------------------------------- 1 | from .opcm import OPCMForCLIP 2 | from .task_arithmetic import ContinualTaskArithmeticForCLIP 3 | from .ties_merging import ContinualTiesMergingForCLIP 4 | from .weight_average import ContinualWeightAverageForCLIP 5 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_losparse_llama/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from . import register 3 | from .configuration_losparse_llama import LoSparseLlamaConfig 4 | from .modeling_losparse_llama import LoSparseLlamaForCausalLM, LoSparseLlamaModel 5 | -------------------------------------------------------------------------------- /fusion_bench/models/open_clip/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the support for the open_clip model. 3 | Modified from https://github.com/nik-dim/tall_masks/ 4 | """ 5 | 6 | from .modeling import ClassificationHead, ImageClassifier, ImageEncoder 7 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_mtl.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelPool@: _template 3 | - /model/clip-vit@models: 4 | - clip-vit-base-patch32 5 | - /dataset/image_classification/train@train_datasets: the_eight_tasks 6 | -------------------------------------------------------------------------------- /examples/trust_region/READMD.md: -------------------------------------------------------------------------------- 1 | 2 | ```bash 3 | fusion_bench \ 4 | method=trust_region/clip_task_arithmetic \ 5 | modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8 \ 6 | taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8 7 | ``` 8 | -------------------------------------------------------------------------------- /config/method/classification/image_classification_finetune_test.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.classification.ImageClassificationFineTuning_Test 2 | checkpoint_path: null 3 | dataloader_kwargs: 4 | batch_size: 256 5 | num_workers: 4 6 | pin_memory: true 7 | -------------------------------------------------------------------------------- /config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - Seq2SeqLMPool@: _template 3 | models: 4 | _pretrained_: 5 | _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained 6 | pretrained_model_name_or_path: google/flan-t5-base 7 | -------------------------------------------------------------------------------- /examples/gossip/clip.sh: -------------------------------------------------------------------------------- 1 | # Layer-wise gossip 2 | fusion_bench \ 3 | method=gossip/layer_wise_clip \ 4 | method.lr=1e-3 \ 5 | modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8 \ 6 | taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8 7 | -------------------------------------------------------------------------------- /config/method/trust_region/clip_task_arithmetic.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.trust_region.clip_task_arithmetic.TaskArithmeticWithTrustRegionForCLIP 2 | scaling_factor: 0.3 3 | threshold_quantile: 0.99 4 | max_samples: 128 5 | batch_size: 128 6 | zero_shot: false 7 | -------------------------------------------------------------------------------- /examples/adamerging/flan_t5_base.sh: -------------------------------------------------------------------------------- 1 | # Layer-wise adamerging 2 | fusion_bench \ 3 | method=adamerging/layer_wise_flan_t5 \ 4 | method.optimizer.lr=1e-3 \ 5 | modelpool=Seq2SeqLMPool/flan-t5-base_glue_lora16_tta \ 6 | taskpool=flan-t5_glue_text_generation 7 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_llama/__init__.py: -------------------------------------------------------------------------------- 1 | from . import register 2 | from .configuration_smile_llama import SmileLlamaConfig 3 | from .modeling_smile_llama import ( 4 | SmileLlamaDecoderLayer, 5 | SmileLlamaForCausalLM, 6 | SmileLlamaModel, 7 | ) 8 | -------------------------------------------------------------------------------- /fusion_bench/method/mixture_of_experts/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .mixtral_merging import ( 3 | MixtralForCausalLMMergingAlgorithm, 4 | MixtralForCausalLMUpscalingAlgorithm, 5 | MixtralMoEMergingAlgorithm, 6 | MixtralUpscalingAlgorithm, 7 | ) 8 | -------------------------------------------------------------------------------- /fusion_bench/method/fisher_merging/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_fisher_merging import FisherMergingForCLIPVisionModel 3 | from .fisher_merging import FisherMergingAlgorithm, get_param_names_to_merge 4 | from .gpt2_fisher_merging import FisherMergingAlgorithmForGPT2 5 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_mistral/__init__.py: -------------------------------------------------------------------------------- 1 | from . import register 2 | from .configuration_smile_mistral import SmileMistralConfig 3 | from .modeling_smile_mistral import ( 4 | SmileMistralDecoderLayer, 5 | SmileMistralForCausalLM, 6 | SmileMistralModel, 7 | ) 8 | -------------------------------------------------------------------------------- /tests/import_profile.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from pyinstrument import Profiler 4 | 5 | 6 | async def main(): 7 | p = Profiler(async_mode="disabled") 8 | 9 | with p: 10 | import fusion_bench 11 | 12 | p.print() 13 | 14 | 15 | asyncio.run(main()) 16 | -------------------------------------------------------------------------------- /docs/taskpool/dummy.md: -------------------------------------------------------------------------------- 1 | # Dummy TaskPool 2 | 3 | The `DummyTaskPool` is used for debugging purposes. 4 | It inherits from the base `TaskPool` class. 5 | 6 | ## Reference 7 | 8 | ::: fusion_bench.taskpool.dummy.DummyTaskPool 9 | options: 10 | members: [evaluate] 11 | -------------------------------------------------------------------------------- /config/method/expert_sparsity/README.md: -------------------------------------------------------------------------------- 1 | Original repo: https://github.com/Lucky-Lance/Expert_Sparsity 2 | 3 | Reference: 4 | Not All Experts are Equal: Efficient Expert Pruning and Skipping for Mixture-of-Experts Large Language Models. 5 | ACL 2024. 6 | http://arxiv.org/abs/2402.14800 7 | -------------------------------------------------------------------------------- /config/method/task_singular_vector/TaskSingularVectorMerging.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.TaskSingularVectorMerging 2 | exclude_keys: null 3 | # alpha is a float or a list of floats 4 | # example: 5 | # alpha: 1 6 | # alpha: [1, 0.5, 0.25] 7 | alpha: 1 8 | return_single_task_models: false 9 | -------------------------------------------------------------------------------- /fusion_bench/method/lm_finetune/causal_lm_pretrain.py: -------------------------------------------------------------------------------- 1 | from fusion_bench import BaseAlgorithm 2 | from fusion_bench.modelpool import CausalLMPool 3 | 4 | 5 | class CausalLMPretrain(BaseAlgorithm): 6 | def run(self, modelpool: CausalLMPool): 7 | tokenizer = modelpool.load_tokenizer() 8 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/vicuna-7b-v1.5.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | models: 4 | _pretrained_: meta-llama/Llama-2-7b-hf 5 | finetuned_model: lmsys/vicuna-7b-v1.5 6 | model_kwargs: 7 | torch_dtype: bfloat16 8 | tokenizer: ${.models.finetuned_model} 9 | -------------------------------------------------------------------------------- /config/modelpool/Seq2SeqLMPool/_template.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.Seq2SeqLMPool 2 | _recursive_: false 3 | _version_: "0.2" 4 | models: ??? 5 | tokenizer: 6 | _target_: transformers.AutoTokenizer.from_pretrained 7 | pretrained_model_name_or_path: google/flan-t5-base 8 | model_kwargs: null 9 | -------------------------------------------------------------------------------- /config/fabric/loggers/swandb_logger.yaml: -------------------------------------------------------------------------------- 1 | #https://github.com/SwanHubX/SwanLab/blob/main/swanlab/integration/pytorch_lightning.py 2 | _target_: swandb.integration.pytorch_lightning.SwanLabLogger 3 | project: ${hydra:job.config_name} 4 | description: "SwanLab logger with FusionBench" 5 | save_dir: ${path.log_dir} 6 | -------------------------------------------------------------------------------- /config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_individual.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool 2 | _recursive_: false 3 | model_dir: ./.cache/task_vectors_checkpoints/ 4 | models: 5 | _pretrained_: 6 | model_name: ViT-B-32 7 | pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt 8 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/_template.yaml: -------------------------------------------------------------------------------- 1 | _usage_: | 2 | defaults: 3 | - CLIPVisionModelPool@: _template 4 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 5 | _recursive_: False 6 | models: ??? 7 | train_datasets: null 8 | test_datasets: null 9 | processor: openai/clip-vit-base-patch32 10 | -------------------------------------------------------------------------------- /config/method/fw_merging/fw_hard.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.FrankWolfeHardAlgorithm 2 | merge_fn: task_arithmetic 3 | max_iters: 10 4 | step_size: 0.1 5 | dataset_size: 100 6 | tasks: [] 7 | init_weight: 8 | loss_fn: cross_entropy 9 | scaling_factor: 0.3 10 | max_num_models: 100 11 | granularity: task 12 | -------------------------------------------------------------------------------- /config/method/fw_merging/fw_soft.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.FrankWolfeSoftAlgorithm 2 | init_weight: 3 | max_iters: 10 4 | merge_fn: 'adamerging' 5 | tasks: 6 | ada_iters: 500 7 | dataset_size: 100 8 | ada_coeff: 1e-8 9 | step_size: 0.1 10 | max_num_models: 100 11 | granularity: task 12 | ada_loss: entropy_loss -------------------------------------------------------------------------------- /docs/api/fusion_bench.method/ensemble.md: -------------------------------------------------------------------------------- 1 | # Model Ensemble 2 | 3 | ::: fusion_bench.method 4 | options: 5 | show_root_heading: false 6 | heading_level: 2 7 | members: 8 | - SimpleEnsembleAlgorithm 9 | - WeightedEnsembleAlgorithm 10 | - MaxModelPredictorAlgorithm 11 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/logging.md: -------------------------------------------------------------------------------- 1 | # Logging Utilities 2 | 3 | ::: fusion_bench.utils.rich_utils 4 | options: 5 | show_root_full_path: true 6 | heading_level: 3 7 | 8 | ::: fusion_bench.utils.pylogger 9 | options: 10 | show_root_full_path: true 11 | heading_level: 3 12 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | from . import register 2 | from .configuration_smile_qwen2 import SmileQwen2Config 3 | from .modeling_smile_qwen2 import ( 4 | SmileQwen2ForCausalLM, 5 | SmileQwen2ForQuestionAnswering, 6 | SmileQwen2ForSequenceClassification, 7 | SmileQwen2Model, 8 | ) 9 | -------------------------------------------------------------------------------- /docs/taskpool/LlamaTestGenerationTaskPool.md: -------------------------------------------------------------------------------- 1 | # LlamaTestGenerationTaskPool 2 | 3 | The `LlamaTestGenerationTaskPool` class is used to evaluate a language model on a set of prompts. It can also be used in an interactive mode for debugging purposes. 4 | 5 | ## References 6 | 7 | ::: fusion_bench.taskpool.llama.test_generation 8 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/dtd.yaml: -------------------------------------------------------------------------------- 1 | dtd: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: tanganke/dtd 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/mnist.yaml: -------------------------------------------------------------------------------- 1 | mnist: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: mnist 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelPool@: _template 3 | - /model/clip-vit@models: clip-vit-base-patch16_eight_tasks 4 | processor: 5 | _target_: transformers.CLIPProcessor.from_pretrained 6 | pretrained_model_name_or_path: openai/clip-vit-base-patch16 7 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | models: 4 | _pretrained_: Qwen/Qwen2.5-7B 5 | math: Qwen/Qwen2.5-Math-7B 6 | code: Qwen/Qwen2.5-Coder-7B 7 | model_kwargs: 8 | torch_dtype: bfloat16 9 | tokenizer: Qwen/Qwen2.5-7B 10 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/package_management.md: -------------------------------------------------------------------------------- 1 | # Package Management 2 | 3 | ::: fusion_bench.utils.packages 4 | options: 5 | show_root_full_path: true 6 | heading_level: 3 7 | 8 | ::: fusion_bench.utils.lazy_imports 9 | options: 10 | show_root_full_path: true 11 | heading_level: 3 -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # tests Folder 2 | 3 | This folder contains all the unit tests for the project. The tests are designed to ensure the functionality and reliability of the codebase. To run the tests, use the following command: 4 | 5 | ```shell 6 | # Run all tests 7 | python -m unittest discover -v -s ./tests -p "test_*.py" 8 | ``` 9 | -------------------------------------------------------------------------------- /config/hydra/default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override help: fusion_bench_help 3 | - override job_logging: rich_logging 4 | run: 5 | dir: ${path.log_dir} 6 | sweep: 7 | dir: ${path.log_dir} 8 | subdir: ${hydra.job.num} 9 | job: 10 | env_set: 11 | HYDRA_FULL_ERROR: ${oc.env:HYDRA_FULL_ERROR,1} 12 | output_subdir: "" 13 | -------------------------------------------------------------------------------- /config/method/wudi/wudi.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: WUDI Merging 3 | # ============================================================================= 4 | _target_: fusion_bench.method.WUDIMerging 5 | 6 | iter_num: 400 7 | exclude_keys: null 8 | -------------------------------------------------------------------------------- /fusion_bench/taskpool/clip_vision/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .clip_rankone_moe_taskpool import RankoneMoECLIPVisionModelTaskPool 3 | from .clip_smile_taskpool import SmileCLIPVisionModelTaskPool 4 | from .clip_sparse_wemoe_taskpool import SparseWEMoECLIPVisionModelTaskPool 5 | from .taskpool import CLIPVisionModelTaskPool 6 | -------------------------------------------------------------------------------- /.vscode/init.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | SCRIPT_DIR=$(cd $(dirname $0); pwd) 3 | 4 | for file in launch.json settings.json 5 | do 6 | if [ -f ${SCRIPT_DIR}/${file} ]; then 7 | echo "File ${file} already exists, skipping" 8 | else 9 | cp -v ${SCRIPT_DIR}/${file}.template ${SCRIPT_DIR}/${file} 10 | fi 11 | done 12 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/gtsrb.yaml: -------------------------------------------------------------------------------- 1 | gtsrb: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: tanganke/gtsrb 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/sun397.yaml: -------------------------------------------------------------------------------- 1 | sun397: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: tanganke/sun397 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 2 | _recursive_: False 3 | processor: openai/clip-vit-base-patch32 4 | models: 5 | _pretrained_: openai/clip-vit-base-patch32 6 | finetuned: tanganke/clip-vit-base-patch32_stanford-cars 7 | platform: hf 8 | -------------------------------------------------------------------------------- /fusion_bench/dataset/imdb.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import Any, Dict, List, Optional 4 | 5 | from datasets import load_dataset, load_from_disk 6 | from transformers import PreTrainedTokenizer 7 | from trl import SFTConfig, SFTTrainer 8 | 9 | import fusion_bench 10 | 11 | log = logging.getLogger(__name__) 12 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/eurosat.yaml: -------------------------------------------------------------------------------- 1 | eurosat: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: tanganke/eurosat 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/resisc45.yaml: -------------------------------------------------------------------------------- 1 | resisc45: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: tanganke/resisc45 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /examples/randes/clip-vit-base-patch32.sh: -------------------------------------------------------------------------------- 1 | fusion_bench \ 2 | fabric.loggers.name=randes_modelsoup/ViT-B-32_TA8 \ 3 | method=randes/superposed_model_soup \ 4 | method.mode=identity_matrix \ 5 | modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only \ 6 | taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8 7 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: 4 | - sun397 5 | - stanford-cars 6 | - resisc45 7 | - eurosat 8 | - svhn 9 | - gtsrb 10 | - mnist 11 | - dtd 12 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | stanford-cars: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | path: tanganke/stanford_cars 6 | split: train 7 | validation_fraction: 0.1 8 | validation_size: null 9 | random_seed: 0 10 | return_split: val 11 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | models: 4 | _pretrained_: Qwen/Qwen2.5-1.5B 5 | expert_1: Qwen/Qwen2.5-Math-1.5B 6 | expert_2: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 7 | model_kwargs: 8 | torch_dtype: bfloat16 9 | tokenizer: Qwen/Qwen2.5-1.5B 10 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/torch.md: -------------------------------------------------------------------------------- 1 | # PyTorch Utilities 2 | 3 | ## Device Management 4 | 5 | ::: fusion_bench.utils.devices 6 | options: 7 | show_root_full_path: true 8 | heading_level: 3 9 | 10 | ## Dtype 11 | 12 | ::: fusion_bench.utils.dtype 13 | options: 14 | show_root_full_path: true 15 | heading_level: 3 -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/stl10.py: -------------------------------------------------------------------------------- 1 | classnames = [ 2 | "airplane", 3 | "bird", 4 | "car", 5 | "cat", 6 | "deer", 7 | "dog", 8 | "horse", 9 | "monkey", 10 | "ship", 11 | "truck", 12 | ] 13 | 14 | templates = [ 15 | lambda c: f"a photo of a {c}.", 16 | lambda c: f"a photo of the {c}.", 17 | ] 18 | -------------------------------------------------------------------------------- /fusion_bench/method/expert_sparsity/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Original repo: https://github.com/Lucky-Lance/Expert_Sparsity 3 | 4 | Reference: 5 | Not All Experts are Equal: Efficient Expert Pruning and Skipping for Mixture-of-Experts Large Language Models. 6 | ACL 2024. 7 | http://arxiv.org/abs/2402.14800 8 | """ 9 | 10 | from .mixtral import * 11 | -------------------------------------------------------------------------------- /config/dataset/image_classification/val/svhn.yaml: -------------------------------------------------------------------------------- 1 | svhn: 2 | _target_: fusion_bench.utils.data.train_validation_split 3 | dataset: 4 | _target_: datasets.load_dataset 5 | _args_: 6 | - svhn 7 | - cropped_digits 8 | split: train 9 | validation_fraction: 0.1 10 | validation_size: null 11 | random_seed: 0 12 | return_split: val 13 | -------------------------------------------------------------------------------- /config/fabric/strategy/llama_fsdp.yaml: -------------------------------------------------------------------------------- 1 | _target_: lightning.fabric.strategies.FSDPStrategy 2 | sharding_strategy: FULL_SHARD 3 | cpu_offload: false 4 | auto_wrap_policy: 5 | _target_: fusion_bench.mixins.lightning_fabric.get_policy 6 | _args_: 7 | - transformers.models.llama.modeling_llama.LlamaDecoderLayer 8 | activation_checkpointing_policy: ${.auto_wrap_policy} 9 | -------------------------------------------------------------------------------- /config/method/regmean/regmean.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: RegMean (Base) 3 | # ============================================================================= 4 | _target_: ??? 5 | num_regmean_examples: 256 6 | reduce_non_diagonal_ratio: 0.1 7 | exclude_param_names_regex: [] 8 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 2 | _recursive_: False 3 | processor: openai/clip-vit-base-patch32 4 | models: 5 | _pretrained_: openai/clip-vit-base-patch32 6 | svhn: tanganke/clip-vit-base-patch32_svhn 7 | mnist: tanganke/clip-vit-base-patch32_mnist 8 | platform: hf 9 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_code.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | enable_lazy_loading: true 4 | models: 5 | _pretrained_: Qwen/Qwen2.5-1.5B 6 | math: Qwen/Qwen2.5-Math-1.5B 7 | code: Qwen/Qwen2.5-Coder-1.5B 8 | model_kwargs: 9 | torch_dtype: bfloat16 10 | tokenizer: Qwen/Qwen2.5-1.5B 11 | -------------------------------------------------------------------------------- /fusion_bench/method/pruning/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .llama_magnitude_prune import MagnitudePruningForLlama 3 | from .llama_random_prune import RandomPruningForLlama 4 | from .llama_sparsegpt_prune import SparseGPTPruningForLlama 5 | from .llama_wanda_prune import WandaPruningForLlama 6 | from .magnitude_diff_pruning import MagnitudeDiffPruningAlgorithm 7 | -------------------------------------------------------------------------------- /config/modelpool/ConvNextForImageClassification/convnext-base-224.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.ConvNextForImageClassificationPool 2 | _recursive_: False 3 | models: 4 | _pretrained_: 5 | config_path: facebook/convnext-base-224 6 | pretrained: true 7 | dataset_name: null 8 | train_datasets: null 9 | val_datasets: null 10 | test_datasets: null 11 | -------------------------------------------------------------------------------- /config/modelpool/automodelpool.yaml: -------------------------------------------------------------------------------- 1 | type: AutoModelPool 2 | models: 3 | - name: _pretrained_ 4 | path: path_to_your_pretrained_model 5 | - name: model_1 6 | path: path_to_your_model_1 7 | - name: model_2 8 | path: path_to_your_model_2 9 | - name: model_3 10 | path: path_to_your_model_3 11 | - name: model_4 12 | path: path_to_your_model_4 13 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_gemma2/__init__.py: -------------------------------------------------------------------------------- 1 | from . import register 2 | from .configuration_smile_gemma2 import SmileGemma2Config 3 | from .modeling_smile_gemma2 import ( 4 | SmileGemma2ForCausalLM, 5 | SmileGemma2ForSequenceClassification, 6 | SmileGemma2ForTokenClassification, 7 | SmileGemma2Model, 8 | SmileGemma2PreTrainedModel, 9 | ) 10 | -------------------------------------------------------------------------------- /config/dataset/image_classification/README.md: -------------------------------------------------------------------------------- 1 | # Image Classification Dataset Configurations 2 | 3 | This folder contains the dataset configuration for image classification tasks. 4 | 5 | - Each dataset should have 'image' and 'label' columns. 6 | - If a dataset has no test split, we will use the validation split as the test split and create the validation set from the training set. 7 | -------------------------------------------------------------------------------- /fusion_bench/mixins/openclip_classification.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from fusion_bench.mixins import LightningFabricMixin 4 | from fusion_bench.models.open_clip import ImageClassifier, ImageEncoder 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | 9 | class OpenCLIPClassificationMixin(LightningFabricMixin): 10 | _train_processor = None 11 | _test_processor = None 12 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_val.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | # use validation set as test set 4 | - /dataset/image_classification/val@test_datasets: 5 | - sun397 6 | - stanford-cars 7 | - resisc45 8 | - eurosat 9 | - svhn 10 | - gtsrb 11 | - mnist 12 | - dtd 13 | -------------------------------------------------------------------------------- /fusion_bench/method/task_singular_vector/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is modified from the original code of the paper: 3 | 4 | - Gargiulo, et.al. Task Singular Vectors: Reducing Task Interference in Model Merging 5 | - http://arxiv.org/abs/2412.00081 6 | - https://github.com/AntoAndGar/task_singular_vectors/ 7 | """ 8 | 9 | from .TSVM import TaskSingularVectorMerging 10 | -------------------------------------------------------------------------------- /config/modelpool/Dinov2ForImageClassification/dinov2-base-imagenet1k-1-layer.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.Dinov2ForImageClassificationPool 2 | _recursive_: False 3 | models: 4 | _pretrained_: 5 | config_path: facebook/dinov2-base-imagenet1k-1-layer 6 | pretrained: true 7 | dataset_name: null 8 | train_datasets: null 9 | val_datasets: null 10 | test_datasets: null 11 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_with_control_task.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: 4 | - tiny-imagenet 5 | - sun397 6 | - stanford-cars 7 | - resisc45 8 | - eurosat 9 | - svhn 10 | - gtsrb 11 | - mnist 12 | - dtd 13 | -------------------------------------------------------------------------------- /fusion_bench/method/pruning/wanda_utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is modified from https://github.com/locuslab/wanda. 3 | 4 | It contains utility functions and classes for pruning neural network models using the Wanda method. 5 | The WANDA method is a weight pruning technique that aims to reduce the number of parameters in a neural network 6 | while maintaining its performance. 7 | """ 8 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 2 | _recursive_: false 3 | processor: openai/clip-vit-base-patch32 4 | models: 5 | _pretrained_: openai/clip-vit-base-patch32 6 | sun397: tanganke/clip-vit-base-patch32_sun397 7 | stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars 8 | platform: hf 9 | -------------------------------------------------------------------------------- /config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_L14.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelTaskPool@: _template 3 | - /dataset/image_classification/test@test_datasets: 4 | - sun397 5 | - stanford-cars 6 | - resisc45 7 | - eurosat 8 | - svhn 9 | - gtsrb 10 | - mnist 11 | - dtd 12 | base_model: openai/clip-vit-large-patch14 13 | -------------------------------------------------------------------------------- /docs/taskpool/gpt2_classification.md: -------------------------------------------------------------------------------- 1 | # GPT-2 Sequence Classification Tasks 2 | 3 | This task pool provides a set of sequence classification tasks from the GLUE benchmark for the GPT-2 model. 4 | Each task is associated with a dataset and the accuracy metric. The tasks are: 5 | CoLA, MNLI, MRPC, QNLI, QQP, RTE, and SST2. 6 | 7 | ## References 8 | 9 | ::: fusion_bench.taskpool.gpt2_text_classification -------------------------------------------------------------------------------- /fusion_bench/method/task_singular_vector/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from fusion_bench.method.ties_merging.ties_merging_utils import ( 2 | check_parameterNamesMatch, 3 | check_state_dicts_equal, 4 | ) 5 | from fusion_bench.utils import state_dict_to_vector, vector_to_state_dict 6 | 7 | from . import TSVC_utils, TSVM_utils 8 | from .task_singular_interference import compute_task_singular_interference 9 | -------------------------------------------------------------------------------- /config/modelpool/smile_mistral_exp_v1.yaml: -------------------------------------------------------------------------------- 1 | type: AutoModelForCausalLMPool 2 | # each model should have a name and a path, and the model is loaded from the path 3 | # this is equivalent to `AutoModelForCausalLM.from_pretrained(path)` 4 | models: 5 | - name: _pretrained_ 6 | path: mistralai/Mistral-7B-v0.1 7 | - name: expert_1 8 | path: meta-math/MetaMath-Mistral-7B 9 | dtype: float16 10 | -------------------------------------------------------------------------------- /config/method/pruning/llama_random_pruning.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.RandomPruningForLlama 2 | prune_type: unstructured 3 | # === options for unstructured pruning === 4 | # `sparsity_ratio` is the ratio of weights to be pruned, 1 means all weights are pruned 5 | sparsity_ratio: 0.5 6 | # === options for semistructured pruning === 7 | # 2:4 means 2 out of 4 weights are pruned 8 | n: 2 9 | m: 4 10 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch16_eight_tasks.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - clip-vit-base-patch16 3 | - clip-vit-base-patch16_sun397 4 | - clip-vit-base-patch16_stanford-cars 5 | - clip-vit-base-patch16_resisc45 6 | - clip-vit-base-patch16_eurosat 7 | - clip-vit-base-patch16_svhn 8 | - clip-vit-base-patch16_gtsrb 9 | - clip-vit-base-patch16_mnist 10 | - clip-vit-base-patch16_dtd 11 | -------------------------------------------------------------------------------- /set_cache_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | if [ -d /mnt/huggingface_cache ]; then 6 | export HF_HOME=/mnt/huggingface_cache/ 7 | else 8 | export HF_HOME=$SCRIPT_DIR/.cache/huggingface 9 | fi 10 | 11 | echo "HF_HOME set to $HF_HOME" 12 | 13 | # if `HF_HOME` does not exist, create it 14 | if [ ! -d $HF_HOME ]; then 15 | mkdir -p $HF_HOME 16 | fi 17 | -------------------------------------------------------------------------------- /config/fabric/strategy/llama_peft_fsdp.yaml: -------------------------------------------------------------------------------- 1 | _target_: lightning.fabric.strategies.FSDPStrategy 2 | sharding_strategy: FULL_SHARD 3 | state_dict_type: full # Save a single, consolidated checkpoint file 4 | cpu_offload: false 5 | auto_wrap_policy: 6 | _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy 7 | activation_checkpointing_policy: ${.auto_wrap_policy} 8 | # limit_all_gathers: true 9 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | enable_lazy_loading: true 4 | models: 5 | _pretrained_: Qwen/Qwen2.5-1.5B 6 | math: Qwen/Qwen2.5-Math-1.5B 7 | code: Qwen/Qwen2.5-Coder-1.5B 8 | instruction: Qwen/Qwen2.5-1.5B-Instruct 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: Qwen/Qwen2.5-1.5B 12 | -------------------------------------------------------------------------------- /config/modelpool/smile_mistral_exp_v3.yaml: -------------------------------------------------------------------------------- 1 | type: AutoModelForCausalLMPool 2 | # each model should have a name and a path, and the model is loaded from the path 3 | # this is equivalent to `AutoModelForCausalLM.from_pretrained(path)` 4 | models: 5 | - name: _pretrained_ 6 | path: mistralai/Mistral-7B-v0.1 7 | - name: expert_1 8 | path: uukuguy/speechless-code-mistral-7b-v1.0 9 | dtype: float16 10 | -------------------------------------------------------------------------------- /config/fabric/strategy/deepspeed.yaml: -------------------------------------------------------------------------------- 1 | # https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy 2 | _target_: lightning.fabric.strategies.DeepSpeedStrategy 3 | accelerator: null 4 | zero_optimization: true 5 | stage: 2 6 | offload_optimizer: false 7 | offload_parameters: false 8 | offload_params_device: "cpu" 9 | offload_optimizer_device: "cpu" 10 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-large-patch14_eight_tasks.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - clip-vit-large-patch14 3 | - clip-vit-large-patch14_sun397 4 | - clip-vit-large-patch14_stanford-cars 5 | - clip-vit-large-patch14_resisc45 6 | - clip-vit-large-patch14_eurosat 7 | - clip-vit-large-patch14_svhn 8 | - clip-vit-large-patch14_gtsrb 9 | - clip-vit-large-patch14_mnist 10 | - clip-vit-large-patch14_dtd 11 | -------------------------------------------------------------------------------- /config/modelpool/smile_mistral_exp_v2.yaml: -------------------------------------------------------------------------------- 1 | type: AutoModelForCausalLMPool 2 | # each model should have a name and a path, and the model is loaded from the path 3 | # this is equivalent to `AutoModelForCausalLM.from_pretrained(path)` 4 | models: 5 | - name: _pretrained_ 6 | path: mistralai/Mistral-7B-v0.1 7 | - name: expert_1 8 | path: cognitivecomputations/dolphin-2.1-mistral-7b 9 | dtype: float16 10 | -------------------------------------------------------------------------------- /examples/mergebench/evaluate_all.sh: -------------------------------------------------------------------------------- 1 | for script in \ 2 | evaluate_gemma-2-9b.sh \ 3 | evaluate_gemma-2-9b-it.sh \ 4 | evaluate_Llama-3.1-8B.sh \ 5 | evaluate_Llama-3.1-8B-Instruct.sh \ 6 | evaluate_gemma-2-2b.sh \ 7 | evaluate_gemma-2-2b-it.sh \ 8 | evaluate_Llama-3.2-3B.sh \ 9 | evaluate_Llama-3.2-3B-Instruct.sh; do 10 | echo "Running $script" 11 | bash $script 12 | done 13 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/kmnist.py: -------------------------------------------------------------------------------- 1 | classnames_mapping = { 2 | "0": "お", 3 | "1": "き", 4 | "2": "す", 5 | "3": "つ", 6 | "4": "な", 7 | "5": "は", 8 | "6": "ま", 9 | "7": "や", 10 | "8": "れ", 11 | "9": "を", 12 | } 13 | classnames = [classnames_mapping[str(c)] for c in range(10)] 14 | 15 | templates = [ 16 | lambda c: f"a photo of the character {c}.", 17 | ] 18 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | enable_lazy_loading: true 4 | models: 5 | _pretrained_: meta-llama/Llama-2-7b-hf 6 | chat: meta-llama/Llama-2-7b-chat-hf 7 | math: WizardLMTeam/WizardMath-7B-V1.0 8 | code: codellama/CodeLlama-7b-hf 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: meta-llama/Llama-2-7b-hf 12 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mixtral_moe_merging.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: path_to_your_pretrained_model 4 | expert_1: path_to_your_expert_model_1 5 | expert_2: path_to_your_expert_model_2 6 | expert_3: path_to_your_expert_model_3 7 | expert_4: path_to_your_expert_model_4 8 | tokenizer: ${.models._pretrained_} 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | -------------------------------------------------------------------------------- /docs/javascripts/mathjax.js: -------------------------------------------------------------------------------- 1 | window.MathJax = { 2 | tex: { 3 | inlineMath: [["\\(", "\\)"]], 4 | displayMath: [["\\[", "\\]"]], 5 | processEscapes: true, 6 | processEnvironments: true 7 | }, 8 | options: { 9 | ignoreHtmlClass: ".*|", 10 | processHtmlClass: "arithmatex" 11 | } 12 | }; 13 | 14 | document$.subscribe(() => { 15 | MathJax.typesetPromise() 16 | }) -------------------------------------------------------------------------------- /examples/mergebench/evaluate_gemma-2-2b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "google/gemma-2-2b" 9 | "MergeBench/gemma-2-2b_instruction" 10 | "MergeBench/gemma-2-2b_math" 11 | "MergeBench/gemma-2-2b_coding" 12 | "MergeBench/gemma-2-2b_multilingual" 13 | "MergeBench/gemma-2-2b_safety" 14 | ) 15 | 16 | evaluate_all_models 17 | -------------------------------------------------------------------------------- /examples/mergebench/evaluate_gemma-2-9b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "google/gemma-2-9b" 9 | "MergeBench/gemma-2-9b_instruction" 10 | "MergeBench/gemma-2-9b_math" 11 | "MergeBench/gemma-2-9b_coding" 12 | "MergeBench/gemma-2-9b_multilingual" 13 | "MergeBench/gemma-2-9b_safety" 14 | ) 15 | 16 | evaluate_all_models 17 | -------------------------------------------------------------------------------- /config/method/mixtral_moe_merging.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: Mixtral MoE Merging/Upscaling 3 | # ============================================================================= 4 | name: mixtral_moe_upscaling # or "mixtral_for_causal_lm_moe_upscaling" 5 | experts_per_token: 2 6 | # path to save the upscaled model 7 | save_checkpoint: null 8 | -------------------------------------------------------------------------------- /config/method/linear/task_arithmetic_for_causallm.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: Task Arithmetic (Causal LM) 3 | # ============================================================================= 4 | _target_: fusion_bench.method.TaskArithmeticForCausalLM 5 | scaling_factor: 0.3 6 | merge_backbone: false 7 | model_save_path: ${path.log_dir}/checkpoint 8 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | _recursive_: false 3 | models: 4 | _pretrained_: mistralai/Mistral-7B-v0.1 5 | expert_1: meta-math/MetaMath-Mistral-7B 6 | expert_2: cognitivecomputations/dolphin-2.1-mistral-7b 7 | expert_3: uukuguy/speechless-code-mistral-7b-v1.0 8 | model_kwargs: 9 | torch_dtype: bfloat16 10 | tokenizer: mistralai/Mistral-7B-v0.1 11 | -------------------------------------------------------------------------------- /config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - Seq2SeqLMPool@: _template 3 | - /model/flan-t5@models: 4 | - flan-t5-base 5 | - flan-t5-base_glue-cola 6 | - flan-t5-base_glue-mnli 7 | - flan-t5-base_glue-mrpc 8 | - flan-t5-base_glue-qnli 9 | - flan-t5-base_glue-qqp 10 | - flan-t5-base_glue-rte 11 | - flan-t5-base_glue-sst2 12 | - flan-t5-base_glue-stsb 13 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.program.md: -------------------------------------------------------------------------------- 1 | # fusion_bench.program 2 | 3 | ## Class Definitions 4 | 5 | - [fusion_bench.programs.BaseHydraProgram][]: Base class for Hydra-based programs in FusionBench. 6 | - [fusion_bench.programs.FabricModelFusionProgram][]: A program for fusing models using Lightning Fabric. 7 | 8 | ## References 9 | 10 | ::: fusion_bench.programs.BaseHydraProgram 11 | ::: fusion_bench.programs.FabricModelFusionProgram 12 | -------------------------------------------------------------------------------- /examples/mergebench/evaluate_Llama-3.1-8B.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "meta-llama/Llama-3.1-8B" 9 | "MergeBench/Llama-3.1-8B_instruction" 10 | "MergeBench/Llama-3.1-8B_math" 11 | "MergeBench/Llama-3.1-8B_coding" 12 | "MergeBench/Llama-3.1-8B_multilingual" 13 | "MergeBench/Llama-3.1-8B_safety" 14 | ) 15 | 16 | evaluate_all_models 17 | -------------------------------------------------------------------------------- /examples/mergebench/evaluate_Llama-3.2-3B.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "meta-llama/Llama-3.2-3B" 9 | "MergeBench/Llama-3.2-3B_instruction" 10 | "MergeBench/Llama-3.2-3B_math" 11 | "MergeBench/Llama-3.2-3B_coding" 12 | "MergeBench/Llama-3.2-3B_multilingual" 13 | "MergeBench/Llama-3.2-3B_safety" 14 | ) 15 | 16 | evaluate_all_models 17 | -------------------------------------------------------------------------------- /config/fabric/loggers/csv_logger.yaml: -------------------------------------------------------------------------------- 1 | _target_: lightning.fabric.loggers.CSVLogger 2 | # the logs directory would be `root_dir/name/version_X` 3 | # for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default 4 | 5 | # root directory for all logging 6 | root_dir: ${path.log_dir} 7 | # the name of the experiment 8 | name: "" 9 | version: "" 10 | prefix: "" 11 | flush_logs_every_n_steps: 100 12 | -------------------------------------------------------------------------------- /fusion_bench/constants/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | import importlib.metadata 3 | 4 | from .paths import * 5 | from .runtime import RuntimeConstants 6 | 7 | # fusionbench version 8 | try: 9 | FUSION_BENCH_VERSION = importlib.metadata.version("fusion-bench") 10 | except importlib.metadata.PackageNotFoundError: 11 | # Fallback when package is not installed (e.g., during development) 12 | FUSION_BENCH_VERSION = "0.0.0.dev" 13 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_llama/register.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM 2 | 3 | from .configuration_smile_llama import SmileLlamaConfig 4 | from .modeling_smile_llama import SmileLlamaForCausalLM, SmileLlamaModel 5 | 6 | AutoConfig.register("smile_llama", SmileLlamaConfig) 7 | AutoModel.register(SmileLlamaConfig, SmileLlamaModel) 8 | AutoModelForCausalLM.register(SmileLlamaConfig, SmileLlamaForCausalLM) 9 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_qwen2/register.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM 2 | 3 | from .configuration_smile_qwen2 import SmileQwen2Config 4 | from .modeling_smile_qwen2 import SmileQwen2ForCausalLM, SmileQwen2Model 5 | 6 | AutoConfig.register("smile_qwen2", SmileQwen2Config) 7 | AutoModel.register(SmileQwen2Config, SmileQwen2Model) 8 | AutoModelForCausalLM.register(SmileQwen2Config, SmileQwen2ForCausalLM) 9 | -------------------------------------------------------------------------------- /config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml: -------------------------------------------------------------------------------- 1 | # The 8 task used in the Task Arithmetic paper 2 | defaults: 3 | - clip-vit-base-patch32 4 | - clip-vit-base-patch32_sun397 5 | - clip-vit-base-patch32_stanford-cars 6 | - clip-vit-base-patch32_resisc45 7 | - clip-vit-base-patch32_eurosat 8 | - clip-vit-base-patch32_svhn 9 | - clip-vit-base-patch32_gtsrb 10 | - clip-vit-base-patch32_mnist 11 | - clip-vit-base-patch32_dtd 12 | -------------------------------------------------------------------------------- /docs/taskpool/flan-t5_generation.md: -------------------------------------------------------------------------------- 1 | # Flan-T5 Models for Text Generation Tasks 2 | 3 | This task pool provides a set of text generation tasks from the GLUE benchmark for the Flan-T5 model. 4 | Each task is associated with a dataset. 5 | We report the exact match accuracy metric for CoLA, MNLI, MRPC, QNLI, QQP, RTE, and SST2, and spearman's rho for STSB. 6 | 7 | ## References 8 | 9 | ::: fusion_bench.compat.taskpool.flan_t5_glue_text_generation 10 | -------------------------------------------------------------------------------- /config/fabric/loggers/tensorboard_logger.yaml: -------------------------------------------------------------------------------- 1 | _target_: lightning.fabric.loggers.TensorBoardLogger 2 | # the logs directory would be `root_dir/name/version_X` 3 | # for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default 4 | 5 | # root directory for all logging 6 | root_dir: ${path.log_dir} 7 | # the name of the experiment 8 | name: "" 9 | version: "" 10 | sub_dir: null 11 | default_hp_metric: false 12 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - CLIPVisionModelPool@: _template 3 | - /model/clip-vit@models: clip-vit-base-patch16_eight_tasks 4 | - /dataset/image_classification/train@train_datasets: the_eight_tasks 5 | - /dataset/image_classification/test@test_datasets: the_eight_tasks 6 | processor: 7 | _target_: transformers.CLIPProcessor.from_pretrained 8 | pretrained_model_name_or_path: openai/clip-vit-base-patch16 9 | -------------------------------------------------------------------------------- /config/method/dummy.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: Dummy 3 | # ============================================================================= 4 | # No-op method for testing pipelines and wiring. 5 | # Instantiates and exits without modifying models. 6 | # ============================================================================= 7 | _target_: fusion_bench.method.DummyAlgorithm 8 | -------------------------------------------------------------------------------- /config/method/linear/weighted_average.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: Weighted Average (Linear) 3 | # ============================================================================= 4 | _target_: fusion_bench.method.WeightedAverageAlgorithm 5 | normalize: true # if true, the weights will be normalized before merging 6 | weights: # List of weights for each model 7 | - 0.5 8 | - 0.5 9 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: google/gemma-2-2b 4 | instruction: MergeBench/gemma-2-2b_instruction 5 | math: MergeBench/gemma-2-2b_math 6 | coding: MergeBench/gemma-2-2b_coding 7 | multilingual: MergeBench/gemma-2-2b_multilingual 8 | safety: MergeBench/gemma-2-2b_safety 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: google/gemma-2-2b 12 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: google/gemma-2-9b 4 | instruction: MergeBench/gemma-2-9b_instruction 5 | math: MergeBench/gemma-2-9b_math 6 | coding: MergeBench/gemma-2-9b_coding 7 | multilingual: MergeBench/gemma-2-9b_multilingual 8 | safety: MergeBench/gemma-2-9b_safety 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: google/gemma-2-9b 12 | -------------------------------------------------------------------------------- /config/model/clip-vit/download_TALL20_models.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | for MODEL in clip-vit-base-patch32 clip-vit-base-patch16 clip-vit-large-patch14; do 3 | for TASK in sun397 stanford-cars resisc45 eurosat svhn gtsrb mnist dtd oxford_flowers102 pcam fer2013 oxford-iiit-pet stl10 cifar100 cifar10 food101 fashion_mnist emnist_letters kmnist rendered-sst2; do 4 | huggingface-cli download --local-dir tanganke/${MODEL}_${TASK} tanganke/${MODEL}_${TASK} 5 | done 6 | done 7 | -------------------------------------------------------------------------------- /examples/open_clip/evaluate_single_model.sh: -------------------------------------------------------------------------------- 1 | fusion_bench \ 2 | method=dummy \ 3 | modelpool=OpenCLIPVisionModelPool/ViT-B-32_individual \ 4 | taskpool=OpenCLIPVisionModelTaskPool/ViT-B-32_TA8 5 | 6 | fusion_bench \ 7 | method=dummy \ 8 | modelpool=OpenCLIPVisionModelPool/ViT-B-32_individual \ 9 | modelpool.models._pretrained_.pickle_path="$\{...model_dir\}/ViT-B-32/SUN397/finetuned.pt" \ 10 | taskpool=OpenCLIPVisionModelTaskPool/ViT-B-32_TA8 11 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_smile_mistral/register.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM 2 | 3 | from .configuration_smile_mistral import SmileMistralConfig 4 | from .modeling_smile_mistral import SmileMistralForCausalLM, SmileMistralModel 5 | 6 | AutoConfig.register("smile_mistral", SmileMistralConfig) 7 | AutoModel.register(SmileMistralConfig, SmileMistralModel) 8 | AutoModelForCausalLM.register(SmileMistralConfig, SmileMistralForCausalLM) 9 | -------------------------------------------------------------------------------- /config/_get_started/llm_slerp.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.programs.FabricModelFusionProgram 2 | _recursive_: false 3 | method: 4 | _target_: fusion_bench.method.SlerpForCausalLM 5 | t: 0.5 6 | modelpool: 7 | _target_: fusion_bench.modelpool.CausalLMPool 8 | models: 9 | model_1: ibivibiv/alpaca-dragon-72b-v1 10 | model_2: moreh/MoMo-72B-lora-1.8.7-DPO 11 | tokenizer: ibivibiv/alpaca-dragon-72b-v1 12 | enable_lazy_loading: true # load model as LazyStateDict 13 | -------------------------------------------------------------------------------- /docs/algorithms/slerp.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: SLERP 3 | --- 4 | # Spherical Linear Interpolation (SLERP) 5 | 6 | SLERP stands for Spherical LinEar inteRPolation[^1]. 7 | 8 | ## Implementation Details 9 | 10 | - [fusion_bench.method.SlerpMergeAlgorithm][]: Architecture-agnostic implementation. 11 | - [fusion_bench.method.SlerpForCausalLM][]: SLERP for large language models 12 | 13 | [^1]: SLERP For Model Merging – A Primer https://www.coinfeeds.ai/ai-blog/slerp-model-merging-primer 14 | -------------------------------------------------------------------------------- /fusion_bench/metrics/text_to_image_generation/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | In this module, we implement some metrics for text-to-image generation tasks. 3 | Including reward functions for alignment and Reinforcement Learning with Human Feedback training (RLHF). 4 | """ 5 | 6 | # flake8: noqa F401 7 | from .aesthetic_scorer import aesthetic_scorer 8 | from .compressibility import jpeg_compressibility_scorer, jpeg_incompressibility_scorer 9 | from .pickscore_scorer import pickscore_scorer 10 | -------------------------------------------------------------------------------- /fusion_bench/models/modeling_losparse_llama/register.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM 2 | 3 | from .configuration_losparse_llama import LoSparseLlamaConfig 4 | from .modeling_losparse_llama import LoSparseLlamaForCausalLM, LoSparseLlamaModel 5 | 6 | AutoConfig.register("losparse_llama", LoSparseLlamaConfig) 7 | AutoModel.register(LoSparseLlamaConfig, LoSparseLlamaModel) 8 | AutoModelForCausalLM.register(LoSparseLlamaConfig, LoSparseLlamaForCausalLM) 9 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_dtd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - dtd 4 | - /dataset/image_classification/test@val_datasets: 5 | - dtd 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: dtd -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_dtd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - dtd 4 | - /dataset/image_classification/test@val_datasets: 5 | - dtd 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: dtd -------------------------------------------------------------------------------- /examples/mergebench/evaluate_gemma-2-2b-it.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "google/gemma-2-2b-it" 9 | "MergeBench/gemma-2-2b-it_instruction" 10 | "MergeBench/gemma-2-2b-it_math" 11 | "MergeBench/gemma-2-2b-it_coding" 12 | "MergeBench/gemma-2-2b-it_multilingual" 13 | "MergeBench/gemma-2-2b-it_safety" 14 | ) 15 | 16 | LM_EVAL_ARGS="--apply_chat_template" 17 | 18 | evaluate_all_models 19 | -------------------------------------------------------------------------------- /examples/mergebench/evaluate_gemma-2-9b-it.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "google/gemma-2-9b-it" 9 | "MergeBench/gemma-2-9b-it_instruction" 10 | "MergeBench/gemma-2-9b-it_math" 11 | "MergeBench/gemma-2-9b-it_coding" 12 | "MergeBench/gemma-2-9b-it_multilingual" 13 | "MergeBench/gemma-2-9b-it_safety" 14 | ) 15 | 16 | LM_EVAL_ARGS="--apply_chat_template" 17 | 18 | evaluate_all_models 19 | -------------------------------------------------------------------------------- /fusion_bench/dataset/fer2013.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | 4 | def load_fer2013(path: str = "clip-benchmark/wds_fer2013", split: str = "train"): 5 | dataset = load_dataset(path, split=split) 6 | dataset = dataset.remove_columns(["__key__", "__url__"]) 7 | dataset = dataset.rename_columns({"jpg": "image", "cls": "label"}) 8 | return dataset 9 | 10 | 11 | if __name__ == "__main__": 12 | dataset = load_fer2013(split="test") 13 | print(dataset) 14 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: meta-llama/Llama-3.1-8B 4 | instruction: MergeBench/Llama-3.1-8B_instruction 5 | math: MergeBench/Llama-3.1-8B_math 6 | coding: MergeBench/Llama-3.1-8B_coding 7 | multilingual: MergeBench/Llama-3.1-8B_multilingual 8 | safety: MergeBench/Llama-3.1-8B_safety 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: meta-llama/Llama-3.1-8B 12 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: meta-llama/Llama-3.2-3B 4 | instruction: MergeBench/Llama-3.2-3B_instruction 5 | math: MergeBench/Llama-3.2-3B_math 6 | coding: MergeBench/Llama-3.2-3B_coding 7 | multilingual: MergeBench/Llama-3.2-3B_multilingual 8 | safety: MergeBench/Llama-3.2-3B_safety 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: meta-llama/Llama-3.2-3B 12 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: google/gemma-2-2b-it 4 | instruction: MergeBench/gemma-2-2b-it_instruction 5 | math: MergeBench/gemma-2-2b-it_math 6 | coding: MergeBench/gemma-2-2b-it_coding 7 | multilingual: MergeBench/gemma-2-2b-it_multilingual 8 | safety: MergeBench/gemma-2-2b-it_safety 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: google/gemma-2-2b-it 12 | -------------------------------------------------------------------------------- /config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.modelpool.CausalLMPool 2 | models: 3 | _pretrained_: google/gemma-2-9b-it 4 | instruction: MergeBench/gemma-2-9b-it_instruction 5 | math: MergeBench/gemma-2-9b-it_math 6 | coding: MergeBench/gemma-2-9b-it_coding 7 | multilingual: MergeBench/gemma-2-9b-it_multilingual 8 | safety: MergeBench/gemma-2-9b-it_safety 9 | model_kwargs: 10 | torch_dtype: bfloat16 11 | tokenizer: google/gemma-2-9b-it 12 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_dtd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - dtd 4 | - /dataset/image_classification/test@val_datasets: 5 | - dtd 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: dtd -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_pcam.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - pcam 4 | - /dataset/image_classification/test@val_datasets: 5 | - pcam 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: pcam -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_svhn.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - svhn 4 | - /dataset/image_classification/test@val_datasets: 5 | - svhn 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: svhn -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_pcam.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - pcam 4 | - /dataset/image_classification/test@val_datasets: 5 | - pcam 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: pcam -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_svhn.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - svhn 4 | - /dataset/image_classification/test@val_datasets: 5 | - svhn 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: svhn -------------------------------------------------------------------------------- /fusion_bench/method/linear/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa F401 2 | from .expo import ExPOAlgorithm 3 | from .linear_interpolation import LinearInterpolationAlgorithm 4 | from .llama_expo import ExPOAlgorithmForLlama 5 | from .simple_average_for_causallm import SimpleAverageForCausalLM, SimpleAverageForLlama 6 | from .task_arithmetic_for_causallm import ( 7 | TaskArithmeticForCausalLM, 8 | TaskArithmeticForLlama, 9 | ) 10 | from .ties_merging_for_causallm import TiesMergingForCausalLM 11 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml: -------------------------------------------------------------------------------- 1 | # The 14 task used in the paper: 2 | # Wang et al. Localizing Task Information for Improved Model Merging and Compression 3 | # http://arxiv.org/abs/2405.07813 4 | defaults: 5 | - CLIPVisionModelPool@: _template 6 | - /model/clip-vit@models: clip-vit-base-patch16_TALL14 7 | processor: 8 | _target_: transformers.CLIPProcessor.from_pretrained 9 | pretrained_model_name_or_path: openai/clip-vit-base-patch16 10 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - gtsrb 4 | - /dataset/image_classification/test@val_datasets: 5 | - gtsrb 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: gtsrb -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - mnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - mnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: mnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_pcam.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - pcam 4 | - /dataset/image_classification/test@val_datasets: 5 | - pcam 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: pcam -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_stl10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - stl10 4 | - /dataset/image_classification/test@val_datasets: 5 | - stl10 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: stl10 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_svhn.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - svhn 4 | - /dataset/image_classification/test@val_datasets: 5 | - svhn 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: svhn -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - gtsrb 4 | - /dataset/image_classification/test@val_datasets: 5 | - gtsrb 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: gtsrb -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - mnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - mnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: mnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_stl10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - stl10 4 | - /dataset/image_classification/test@val_datasets: 5 | - stl10 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: stl10 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_gtsrb.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - gtsrb 4 | - /dataset/image_classification/test@val_datasets: 5 | - gtsrb 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: gtsrb -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - mnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - mnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: mnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_stl10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - stl10 4 | - /dataset/image_classification/test@val_datasets: 5 | - stl10 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: stl10 -------------------------------------------------------------------------------- /config/method/dare/ties_merging.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.dare.DareTiesMerging 2 | # === DARE parameters === 3 | sparsity_ratio: 0.5 4 | only_on_linear_weights: false 5 | rescale: true 6 | # === Ties merging parameters === 7 | # Scaling factor $\lambda$ 8 | scaling_factor: 0.5 9 | threshold: 20 10 | # List of keys to remove from the state dict, default is empty 11 | remove_keys: [] 12 | # Function to merge the models, default is sum. Options are 'sum', 'mean', and 'max' 13 | merge_func: sum 14 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml: -------------------------------------------------------------------------------- 1 | # The 14 task used in the paper: 2 | # Wang et al. Localizing Task Information for Improved Model Merging and Compression 3 | # http://arxiv.org/abs/2405.07813 4 | defaults: 5 | - CLIPVisionModelPool@: _template 6 | - /model/clip-vit@models: clip-vit-large-patch14_TALL14 7 | processor: 8 | _target_: transformers.CLIPProcessor.from_pretrained 9 | pretrained_model_name_or_path: openai/clip-vit-large-patch14 10 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_kmnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - kmnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - kmnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: kmnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_sun397.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - sun397 4 | - /dataset/image_classification/test@val_datasets: 5 | - sun397 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: sun397 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_kmnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - kmnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - kmnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: kmnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_sun397.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - sun397 4 | - /dataset/image_classification/test@val_datasets: 5 | - sun397 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: sun397 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_kmnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - kmnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - kmnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: kmnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_sun397.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - sun397 4 | - /dataset/image_classification/test@val_datasets: 5 | - sun397 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: sun397 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_cifar10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - cifar10 4 | - /dataset/image_classification/test@val_datasets: 5 | - cifar10 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: cifar10 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_eurosat.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - eurosat 4 | - /dataset/image_classification/test@val_datasets: 5 | - eurosat 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: eurosat -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_fer2013.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - fer2013 4 | - /dataset/image_classification/test@val_datasets: 5 | - fer2013 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: fer2013 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_food101.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - food101 4 | - /dataset/image_classification/test@val_datasets: 5 | - food101 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: food101 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_cifar10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - cifar10 4 | - /dataset/image_classification/test@val_datasets: 5 | - cifar10 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: cifar10 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_cifar100.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - cifar100 4 | - /dataset/image_classification/test@val_datasets: 5 | - cifar100 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: cifar100 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_eurosat.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - eurosat 4 | - /dataset/image_classification/test@val_datasets: 5 | - eurosat 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: eurosat -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_fer2013.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - fer2013 4 | - /dataset/image_classification/test@val_datasets: 5 | - fer2013 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: fer2013 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_food101.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - food101 4 | - /dataset/image_classification/test@val_datasets: 5 | - food101 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: food101 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_resisc45.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - resisc45 4 | - /dataset/image_classification/test@val_datasets: 5 | - resisc45 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: resisc45 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_cifar10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - cifar10 4 | - /dataset/image_classification/test@val_datasets: 5 | - cifar10 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: cifar10 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_cifar100.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - cifar100 4 | - /dataset/image_classification/test@val_datasets: 5 | - cifar100 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: cifar100 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_eurosat.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - eurosat 4 | - /dataset/image_classification/test@val_datasets: 5 | - eurosat 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: eurosat -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_fer2013.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - fer2013 4 | - /dataset/image_classification/test@val_datasets: 5 | - fer2013 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: fer2013 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_food101.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - food101 4 | - /dataset/image_classification/test@val_datasets: 5 | - food101 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: food101 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_resisc45.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - resisc45 4 | - /dataset/image_classification/test@val_datasets: 5 | - resisc45 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: resisc45 -------------------------------------------------------------------------------- /config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.taskpool.LMEvalHarnessTaskPool 2 | tasks: 3 | - truthfulqa 4 | batch_size: 1 5 | verbosity: null 6 | include_path: null 7 | apply_chat_template: false 8 | # if `output_path` is not given, the results will be saved to `log_dir/lm_eval_results`, where `log_dir` is the directory controlled by lightning Fabric. 9 | output_path: null 10 | # if `log_samples` is true, the samples will be saved to `output_path`. 11 | log_samples: false 12 | -------------------------------------------------------------------------------- /examples/mergebench/evaluate_Llama-3.2-3B-Instruct.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$(dirname $(realpath $0)) 4 | 5 | source $SCRIPT_DIR/evaluate.sh 6 | 7 | MODELS=( 8 | "MergeBench/Llama-3.2-3B-Instruct_instruction" 9 | "MergeBench/Llama-3.2-3B-Instruct_math" 10 | "MergeBench/Llama-3.2-3B-Instruct_coding" 11 | "MergeBench/Llama-3.2-3B-Instruct_multilingual" 12 | "MergeBench/Llama-3.2-3B-Instruct_safety" 13 | ) 14 | 15 | LM_EVAL_ARGS="--apply_chat_template" 16 | 17 | evaluate_all_models 18 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/fashion_mnist.py: -------------------------------------------------------------------------------- 1 | classname_mapping = { 2 | "0": "T - shirt / top", 3 | "1": "Trouser", 4 | "2": "Pullover", 5 | "3": "Dress", 6 | "4": "Coat", 7 | "5": "Sandal", 8 | "6": "Shirt", 9 | "7": "Sneaker", 10 | "8": "Bag", 11 | "9": "Ankle boot", 12 | } 13 | classnames = [classname_mapping[str(i)] for i in range(10)] 14 | 15 | templates = [ 16 | lambda c: f"a photo of a {c}.", 17 | lambda c: f"a photo of the {c}.", 18 | ] 19 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_cifar100.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - cifar100 4 | - /dataset/image_classification/test@val_datasets: 5 | - cifar100 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: cifar100 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_resisc45.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - resisc45 4 | - /dataset/image_classification/test@val_datasets: 5 | - resisc45 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: resisc45 -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - tiny-imagenet 4 | - _self_ 5 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 6 | _recursive_: False 7 | models: 8 | _pretrained_: openai/clip-vit-base-patch32 9 | model_1: tanganke/clip-vit-base-patch32_sun397 10 | model_2: tanganke/clip-vit-base-patch32_stanford-cars 11 | processor: openai/clip-vit-base-patch32 12 | platform: hf 13 | -------------------------------------------------------------------------------- /fusion_bench/method/we_moe/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_memory_usage(desc): 5 | """ 6 | obtain the current GPU memory usage 7 | 8 | Returns: 9 | str: A string containing the allocated and cached memory in MB. 10 | """ 11 | allocated = torch.cuda.memory_allocated() / 1024**2 # 转换为 MB 12 | cached = torch.cuda.memory_reserved() / 1024**2 # 转换为 MB 13 | return ( 14 | f"{desc}\nAllocated Memory: {allocated:.2f} MB\nCached Memory: {cached:.2f} MB" 15 | ) 16 | -------------------------------------------------------------------------------- /config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml: -------------------------------------------------------------------------------- 1 | # This is useful for evluate the performance of a single clip vision model 2 | # 3 | # fusion_bench \ 4 | # modelpool=CLIPVisionModelPool/clip-vit-large-patch14_individual \ 5 | # modelpool.models._pretrained_=${MODEL_PATH} 6 | # ... 7 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 8 | _recursive_: False 9 | models: 10 | _pretrained_: openai/clip-vit-large-patch14 11 | processor: openai/clip-vit-large-patch14 12 | platform: hf 13 | -------------------------------------------------------------------------------- /fusion_bench/tasks/base_task.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from omegaconf import DictConfig 4 | 5 | 6 | class BaseTask(ABC): 7 | _taskpool = None 8 | 9 | def __init__(self, task_config: DictConfig): 10 | self.config = task_config 11 | 12 | @abstractmethod 13 | def evaluate(self, model): 14 | """ 15 | Evaluate the model on the task. 16 | Returns a dictionary containing the evaluation metrics. 17 | """ 18 | raise NotImplementedError 19 | -------------------------------------------------------------------------------- /config/clip-vit-base-patch32_robustness_corrupted.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - fabric_model_fusion 3 | - override modelpool: CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted 4 | - override method: dummy # change this to the method you want to use 5 | - override taskpool: CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted 6 | - _self_ 7 | # `corruption` can be one of: 8 | # contrast, gaussian_noise, impulse_noise, jpeg_compression, motion_blur, pixelate, spatter 9 | corruption: gaussian_noise 10 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - fashion_mnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - fashion_mnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: fashion_mnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - rendered-sst2 4 | - /dataset/image_classification/test@val_datasets: 5 | - rendered-sst2 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: rendered-sst2 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - stanford-cars 4 | - /dataset/image_classification/test@val_datasets: 5 | - stanford-cars 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: stanford-cars -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - fashion_mnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - fashion_mnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: fashion_mnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - rendered-sst2 4 | - /dataset/image_classification/test@val_datasets: 5 | - rendered-sst2 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: rendered-sst2 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - stanford-cars 4 | - /dataset/image_classification/test@val_datasets: 5 | - stanford-cars 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: stanford-cars -------------------------------------------------------------------------------- /fusion_bench/method/task_singular_vector/TSVC.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor, nn 3 | 4 | from fusion_bench import BaseAlgorithm 5 | 6 | from .utils import TSVC_utils, check_parameterNamesMatch 7 | 8 | 9 | class TaskSingularVectorCompression(BaseAlgorithm): 10 | def __init__(self, **kwargs): 11 | super().__init__(**kwargs) 12 | 13 | def run(self, modelpool): 14 | raise NotImplementedError( 15 | "Task Singular Vector Compression is not implemented yet." 16 | ) 17 | -------------------------------------------------------------------------------- /config/method/bitdelta/bitdelta.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: BitDelta 3 | # ============================================================================= 4 | _target_: fusion_bench.method.bitdelta.BitDeltaAlgorithm 5 | save_dir: null 6 | save_full_model: false 7 | # training arguments 8 | lr: 1e-4 9 | batch_size: 4 10 | num_steps: 100 11 | # dataset arguments 12 | dataset_name: c4 13 | subset: en 14 | split: train 15 | max_length: 128 16 | -------------------------------------------------------------------------------- /config/method/smile_upscaling/singular_projection_merging.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: SMILE Singular Projection Merging 3 | # ============================================================================= 4 | name: singular_projection_merging 5 | # merge device on cuda can accelerate the SVD computation 6 | device: cuda 7 | k: 128 8 | rank: low # or high 9 | full_matrices: false 10 | # path to save/load the model 11 | model_path: null 12 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_fashion_mnist.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - fashion_mnist 4 | - /dataset/image_classification/test@val_datasets: 5 | - fashion_mnist 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: fashion_mnist -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_rendered-sst2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - rendered-sst2 4 | - /dataset/image_classification/test@val_datasets: 5 | - rendered-sst2 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: rendered-sst2 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_stanford-cars.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - stanford-cars 4 | - /dataset/image_classification/test@val_datasets: 5 | - stanford-cars 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: stanford-cars -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - emnist_letters 4 | - /dataset/image_classification/test@val_datasets: 5 | - emnist_letters 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: emnist_letters -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - emnist_letters 4 | - /dataset/image_classification/test@val_datasets: 5 | - emnist_letters 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: emnist_letters -------------------------------------------------------------------------------- /config/method/ensemble/max_model_predictor.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: Max Model Predictor 3 | # ============================================================================= 4 | # Selects the model with maximum confidence or performance per example/task. 5 | # No additional hyperparameters are required. 6 | # ============================================================================= 7 | _target_: fusion_bench.method.MaxModelPredictorAlgorithm 8 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_emnist_letters.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - emnist_letters 4 | - /dataset/image_classification/test@val_datasets: 5 | - emnist_letters 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: emnist_letters -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - oxford-iiit-pet 4 | - /dataset/image_classification/test@val_datasets: 5 | - oxford-iiit-pet 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: oxford-iiit-pet -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - oxford-iiit-pet 4 | - /dataset/image_classification/test@val_datasets: 5 | - oxford-iiit-pet 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: oxford-iiit-pet -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_oxford-iiit-pet.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - oxford-iiit-pet 4 | - /dataset/image_classification/test@val_datasets: 5 | - oxford-iiit-pet 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: oxford-iiit-pet -------------------------------------------------------------------------------- /config/taskpool/reward_model_evaluation.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool 2 | test_datasets: 3 | preference_700k: 4 | _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf 5 | tokenizer: ${...tokenizer} 6 | path: hendrydong/preference_700K 7 | split: train 8 | cache_path: null 9 | dataloader_kwargs: 10 | shuffle: False 11 | batch_size: 16 12 | tokenizer: ${..modelpool.tokenizer} 13 | max_num_samples: 1000 14 | seed: 42 15 | -------------------------------------------------------------------------------- /docs/api/fusion_bench.utils/data.md: -------------------------------------------------------------------------------- 1 | # Data Utilities 2 | 3 | ## Dataset Manipulation 4 | 5 | ::: fusion_bench.utils.data 6 | options: 7 | show_root_full_path: true 8 | heading_level: 3 9 | 10 | ## Json Import/Export 11 | 12 | ::: fusion_bench.utils.json 13 | options: 14 | show_root_full_path: true 15 | heading_level: 3 16 | 17 | ## TensorBoard Data Import 18 | 19 | ::: fusion_bench.utils.tensorboard 20 | options: 21 | show_root_full_path: true 22 | heading_level: 3 23 | -------------------------------------------------------------------------------- /fusion_bench/tasks/clip_classification/fer2013.py: -------------------------------------------------------------------------------- 1 | classnames = [ 2 | "angry", 3 | "disgusted", 4 | "fearful", 5 | "happy", 6 | "neutral", 7 | "sad", 8 | "surprised", 9 | ] 10 | 11 | templates = [ 12 | lambda c: f"a photo of a {c} looking face.", 13 | lambda c: f"a photo of a face showing the emotion: {c}.", 14 | lambda c: f"a photo of a face looking {c}.", 15 | lambda c: f"a face that looks {c}.", 16 | lambda c: f"they look {c}.", 17 | lambda c: f"look at how {c} they are.", 18 | ] 19 | -------------------------------------------------------------------------------- /config/method/moe_pruner/moe_pruner.yaml: -------------------------------------------------------------------------------- 1 | _target_: fusion_bench.method.moe_pruner.MoEPruner 2 | 3 | nsamples: 100 4 | seed: 42 5 | device: cuda 6 | max_seqlen: 2048 7 | # `prune_type` can be either `unstructured` or `semistructured` 8 | prune_type: unstructured 9 | # === options for unstructured pruning === 10 | # `sparsity_ratio` is the ratio of weights to be pruned, 1 means all weights are pruned 11 | sparsity_ratio: 0.5 12 | # === options for semistructured pruning === 13 | # 2:4 means 2 out of 4 weights are pruned 14 | n: 2 15 | m: 4 16 | -------------------------------------------------------------------------------- /config/method/slerp/slerp_lm.yaml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # FusionBench Method Configuration: SLERP for Causal LM 3 | # ============================================================================= 4 | # Spherical linear interpolation between two causal language models. 5 | # ============================================================================= 6 | _target_: fusion_bench.method.SlerpForCausalLM 7 | t: 0.5 8 | model_save_path: ${path.log_dir}/checkpoint 9 | show_pbar: True 10 | -------------------------------------------------------------------------------- /examples/clip_finetune/config/modelpool/clip-finetune_TALL14.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: TALL14 3 | - _self_ 4 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 5 | base_model: openai/clip-vit-base-patch32 6 | models: 7 | _pretrained_: 8 | _target_: transformers.CLIPVisionModel.from_pretrained 9 | pretrained_model_name_or_path: ${...base_model} 10 | processor: 11 | _target_: transformers.CLIPProcessor.from_pretrained 12 | pretrained_model_name_or_path: ${..base_model} 13 | -------------------------------------------------------------------------------- /examples/clip_finetune/config/modelpool/clip-finetune_TALL20.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: TALL20 3 | - _self_ 4 | _target_: fusion_bench.modelpool.CLIPVisionModelPool 5 | base_model: openai/clip-vit-base-patch32 6 | models: 7 | _pretrained_: 8 | _target_: transformers.CLIPVisionModel.from_pretrained 9 | pretrained_model_name_or_path: ${...base_model} 10 | processor: 11 | _target_: transformers.CLIPProcessor.from_pretrained 12 | pretrained_model_name_or_path: ${..base_model} 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: 📚 Documentation 4 | url: https://tanganke.github.io/fusion_bench/ 5 | about: Read the comprehensive FusionBench documentation 6 | - name: 💬 GitHub Discussions 7 | url: https://github.com/tanganke/fusion_bench/discussions 8 | about: Ask questions and discuss ideas with the community 9 | - name: 📖 Examples 10 | url: https://github.com/tanganke/fusion_bench/tree/main/examples 11 | about: Browse example scripts and notebooks 12 | -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet152_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - oxford_flowers102 4 | - /dataset/image_classification/test@val_datasets: 5 | - oxford_flowers102 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-152 13 | pretrained: true 14 | dataset_name: oxford_flowers102 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet18_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - oxford_flowers102 4 | - /dataset/image_classification/test@val_datasets: 5 | - oxford_flowers102 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-18 13 | pretrained: true 14 | dataset_name: oxford_flowers102 -------------------------------------------------------------------------------- /config/modelpool/ResNetForImageClassification/transformers/resnet50_oxford_flowers102.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /dataset/image_classification/train@train_datasets: 3 | - oxford_flowers102 4 | - /dataset/image_classification/test@val_datasets: 5 | - oxford_flowers102 6 | - _self_ 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool 8 | _recursive_: False 9 | type: transformers 10 | models: 11 | _pretrained_: 12 | config_path: microsoft/resnet-50 13 | pretrained: true 14 | dataset_name: oxford_flowers102 --------------------------------------------------------------------------------