├── tests
    ├── __init__.py
    ├── test_utils
    │   └── __init__.py
    ├── test_method
    │   └── __init__.py
    ├── test_mixins
    │   └── __init__.py
    ├── test_modelpool
    │   └── __init__.py
    ├── test_models
    │   └── __init__.py
    ├── import_profile.py
    └── README.md
├── docs
    ├── config
    ├── .gitignore
    ├── algorithms
    │   ├── model_stitching.md
    │   ├── specification_ensemble.md
    │   ├── layer_recombination.md
    │   ├── images
    │   │   ├── ewemoe.png
    │   │   ├── wemoe.png
    │   │   ├── bitdelta.png
    │   │   ├── ewemoe_1.png
    │   │   ├── ewemoe_2.png
    │   │   ├── pwe_moe.png
    │   │   ├── sigmoid.png
    │   │   ├── adamerging.png
    │   │   ├── iso_merging.png
    │   │   ├── solar10.7B.png
    │   │   ├── ties_merging.jpg
    │   │   ├── Task Arithmetic.png
    │   │   ├── smile_upscaling.png
    │   │   ├── sparse_upcycling.png
    │   │   ├── wemoe_lr_tuning.png
    │   │   ├── max-model_predictor.png
    │   │   ├── wemoe_loss_landscape.png
    │   │   ├── concrete_subspace_learning.png
    │   │   ├── fedmr_model_recombination.jpg
    │   │   ├── regmean_vs_regmean_plusplus.png
    │   │   ├── adamerging_layerwise_coefficients.png
    │   │   ├── concrete_adamerging_vs_adamerging.png
    │   │   ├── ties_merging_hyperparameter_tuning.png
    │   │   └── adamerging_model_merging_coefficients.png
    │   ├── pruning
    │   │   └── images
    │   │   │   └── llama_2_4_semistructued_first_layer.png
    │   └── slerp.md
    ├── api
    │   ├── fusion_bench.utils
    │   │   ├── misc.md
    │   │   ├── profiling.md
    │   │   ├── caching.md
    │   │   ├── filesystem.md
    │   │   ├── modelscope.md
    │   │   ├── logging.md
    │   │   ├── package_management.md
    │   │   ├── torch.md
    │   │   └── data.md
    │   ├── fusion_bench.optim.md
    │   ├── fusion_bench.method
    │   │   └── ensemble.md
    │   └── fusion_bench.program.md
    ├── css
    │   └── material_extra.css
    ├── images
    │   ├── llm.png
    │   ├── model_mixing.png
    │   ├── model_ensemble.png
    │   ├── model_merging.png
    │   ├── fusion_bench_flow.png
    │   ├── model_upscaling.png
    │   ├── learning_paradiagm.png
    │   ├── multi-task_core_steps.png
    │   ├── multi-task_model_fusion.png
    │   ├── accelerate model training.png
    │   └── framework_of_model_fusion.png
    ├── cli
    │   └── images
    │   │   ├── vscode_debug.png
    │   │   ├── pycharm_debug_1.png
    │   │   ├── pycharm_debug_2.png
    │   │   ├── pycharm_debug_3.png
    │   │   ├── tab_completion.png
    │   │   └── fusion_bench_webui.png
    ├── modelpool
    │   ├── clip-vit-cos.png
    │   └── images
    │   │   ├── convnext_block.png
    │   │   ├── clip_eight_corruption.png
    │   │   ├── NYUv2-0000003446-63769b25.jpg
    │   │   ├── clip-vit-base-patch16_full&lora&l-lora.png
    │   │   └── clip-vit-base-patch16_full&lora&l-lora_average.png
    ├── readinglist
    │   └── images
    │   │   ├── watt.png
    │   │   ├── fusellm.png
    │   │   ├── lorahub.png
    │   │   ├── pwe_moe.png
    │   │   ├── forkmerge.png
    │   │   ├── fs-merge.png
    │   │   ├── fusechat.png
    │   │   ├── lora_lego.png
    │   │   ├── pituning.png
    │   │   ├── adapter_soup.png
    │   │   ├── twin_merging.png
    │   │   ├── depth_upscaling.png
    │   │   ├── scaling_smart.png
    │   │   ├── smile_upscaling.png
    │   │   ├── Chronopoulou2023.png
    │   │   ├── enneng2024survey.png
    │   │   ├── sparse-modelsoups.png
    │   │   ├── sparse_upcycling.png
    │   │   ├── branch_and_merging.png
    │   │   └── branch_and_merging_alg.png
    ├── guides
    │   └── fusion_bench
    │   │   └── mixins
    │   │       └── lightning_fabric.md
    ├── taskpool
    │   ├── dummy.md
    │   ├── LlamaTestGenerationTaskPool.md
    │   ├── gpt2_classification.md
    │   └── flan-t5_generation.md
    └── javascripts
    │   └── mathjax.js
├── .vscode
    ├── .gitignore
    └── init.sh
├── fusion_bench_config
├── config
    ├── .gitignore
    ├── model
    │   ├── clip-vit
    │   │   ├── clip-vit-base-patch16.yaml
    │   │   ├── clip-vit-base-patch16_dtd.yaml
    │   │   ├── clip-vit-base-patch32.yaml
    │   │   ├── clip-vit-base-patch32_dtd.yaml
    │   │   ├── clip-vit-large-patch14.yaml
    │   │   ├── clip-vit-large-patch14_dtd.yaml
    │   │   ├── clip-vit-base-patch16_gtsrb.yaml
    │   │   ├── clip-vit-base-patch16_mnist.yaml
    │   │   ├── clip-vit-base-patch16_pcam.yaml
    │   │   ├── clip-vit-base-patch16_stl10.yaml
    │   │   ├── clip-vit-base-patch16_svhn.yaml
    │   │   ├── clip-vit-base-patch32_gtsrb.yaml
    │   │   ├── clip-vit-base-patch32_mnist.yaml
    │   │   ├── clip-vit-base-patch32_pcam.yaml
    │   │   ├── clip-vit-base-patch32_stl10.yaml
    │   │   ├── clip-vit-base-patch32_svhn.yaml
    │   │   ├── clip-vit-large-patch14_pcam.yaml
    │   │   ├── clip-vit-large-patch14_svhn.yaml
    │   │   ├── clip-vit-base-patch16_cifar10.yaml
    │   │   ├── clip-vit-base-patch16_eurosat.yaml
    │   │   ├── clip-vit-base-patch16_fer2013.yaml
    │   │   ├── clip-vit-base-patch16_food101.yaml
    │   │   ├── clip-vit-base-patch16_kmnist.yaml
    │   │   ├── clip-vit-base-patch16_sun397.yaml
    │   │   ├── clip-vit-base-patch32_cifar10.yaml
    │   │   ├── clip-vit-base-patch32_eurosat.yaml
    │   │   ├── clip-vit-base-patch32_fer2013.yaml
    │   │   ├── clip-vit-base-patch32_food101.yaml
    │   │   ├── clip-vit-base-patch32_kmnist.yaml
    │   │   ├── clip-vit-base-patch32_sun397.yaml
    │   │   ├── clip-vit-large-patch14_gtsrb.yaml
    │   │   ├── clip-vit-large-patch14_kmnist.yaml
    │   │   ├── clip-vit-large-patch14_mnist.yaml
    │   │   ├── clip-vit-large-patch14_stl10.yaml
    │   │   ├── clip-vit-large-patch14_sun397.yaml
    │   │   ├── clip-vit-base-patch16_cifar100.yaml
    │   │   ├── clip-vit-base-patch16_resisc45.yaml
    │   │   ├── clip-vit-base-patch32_cifar100.yaml
    │   │   ├── clip-vit-base-patch32_resisc45.yaml
    │   │   ├── clip-vit-large-patch14_cifar10.yaml
    │   │   ├── clip-vit-large-patch14_cifar100.yaml
    │   │   ├── clip-vit-large-patch14_eurosat.yaml
    │   │   ├── clip-vit-large-patch14_fer2013.yaml
    │   │   ├── clip-vit-large-patch14_food101.yaml
    │   │   ├── clip-vit-large-patch14_resisc45.yaml
    │   │   ├── clip-vit-base-patch16_fashion_mnist.yaml
    │   │   ├── clip-vit-base-patch16_rendered-sst2.yaml
    │   │   ├── clip-vit-base-patch16_stanford-cars.yaml
    │   │   ├── clip-vit-base-patch32_fashion_mnist.yaml
    │   │   ├── clip-vit-base-patch32_rendered-sst2.yaml
    │   │   ├── clip-vit-base-patch32_stanford-cars.yaml
    │   │   ├── clip-vit-large-patch14_fashion_mnist.yaml
    │   │   ├── clip-vit-large-patch14_rendered-sst2.yaml
    │   │   ├── clip-vit-large-patch14_stanford-cars.yaml
    │   │   ├── clip-vit-base-patch16_emnist_letters.yaml
    │   │   ├── clip-vit-base-patch16_oxford-iiit-pet.yaml
    │   │   ├── clip-vit-base-patch32_emnist_letters.yaml
    │   │   ├── clip-vit-base-patch32_oxford-iiit-pet.yaml
    │   │   ├── clip-vit-large-patch14_emnist_letters.yaml
    │   │   ├── clip-vit-base-patch16_oxford_flowers102.yaml
    │   │   ├── clip-vit-base-patch32_oxford_flowers102.yaml
    │   │   ├── clip-vit-large-patch14_oxford-iiit-pet.yaml
    │   │   ├── clip-vit-large-patch14_oxford_flowers102.yaml
    │   │   ├── clip-vit-base-patch16_eight_tasks.yaml
    │   │   ├── clip-vit-large-patch14_eight_tasks.yaml
    │   │   ├── clip-vit-base-patch32_eight_tasks.yaml
    │   │   └── download_TALL20_models.sh
    │   └── flan-t5
    │   │   ├── flan-t5-base.yaml
    │   │   ├── flan-t5-large.yaml
    │   │   ├── flan-t5-base_glue-qqp.yaml
    │   │   ├── flan-t5-base_glue-rte.yaml
    │   │   ├── flan-t5-base_glue-cola.yaml
    │   │   ├── flan-t5-base_glue-mnli.yaml
    │   │   ├── flan-t5-base_glue-mrpc.yaml
    │   │   ├── flan-t5-base_glue-qnli.yaml
    │   │   ├── flan-t5-base_glue-sst2.yaml
    │   │   ├── flan-t5-base_glue-stsb.yaml
    │   │   ├── flan-t5-base_glue-cola_lora-16.yaml
    │   │   ├── flan-t5-base_glue-mnli_lora-16.yaml
    │   │   ├── flan-t5-base_glue-mrpc_lora-16.yaml
    │   │   ├── flan-t5-base_glue-qnli_lora-16.yaml
    │   │   ├── flan-t5-base_glue-qqp_lora-16.yaml
    │   │   ├── flan-t5-base_glue-rte_lora-16.yaml
    │   │   ├── flan-t5-base_glue-sst2_lora-16.yaml
    │   │   ├── flan-t5-base_glue-stsb_lora-16.yaml
    │   │   ├── flan-t5-large_glue-qqp_lora-16.yaml
    │   │   ├── flan-t5-large_glue-rte_lora-16.yaml
    │   │   ├── flan-t5-large_glue-cola_lora-16.yaml
    │   │   ├── flan-t5-large_glue-mnli_lora-16.yaml
    │   │   ├── flan-t5-large_glue-mrpc_lora-16.yaml
    │   │   ├── flan-t5-large_glue-qnli_lora-16.yaml
    │   │   ├── flan-t5-large_glue-sst2_lora-16.yaml
    │   │   └── flan-t5-large_glue-stsb_lora-16.yaml
    ├── taskpool
    │   ├── dummy.yaml
    │   ├── CLIPVisionModelTaskPool
    │   │   ├── clip-vit-single-task_dtd.yaml
    │   │   ├── clip-vit-single-task_gtsrb.yaml
    │   │   ├── clip-vit-single-task_mnist.yaml
    │   │   ├── clip-vit-single-task_pcam.yaml
    │   │   ├── clip-vit-single-task_stl10.yaml
    │   │   ├── clip-vit-single-task_svhn.yaml
    │   │   ├── clip-vit-single-task_cifar10.yaml
    │   │   ├── clip-vit-single-task_eurosat.yaml
    │   │   ├── clip-vit-single-task_fer2013.yaml
    │   │   ├── clip-vit-single-task_food101.yaml
    │   │   ├── clip-vit-single-task_kmnist.yaml
    │   │   ├── clip-vit-single-task_sun397.yaml
    │   │   ├── clip-vit-single-task_cifar100.yaml
    │   │   ├── clip-vit-single-task_resisc45.yaml
    │   │   ├── clip-vit-single-task_emnist_letters.yaml
    │   │   ├── clip-vit-single-task_fashion_mnist.yaml
    │   │   ├── clip-vit-single-task_oxford-iiit-pet.yaml
    │   │   ├── clip-vit-single-task_rendered-sst2.yaml
    │   │   ├── clip-vit-single-task_stanford-cars.yaml
    │   │   ├── clip-vit-single-task_oxford_flowers102.yaml
    │   │   ├── clip-vit-single-task_oxford_flowers102_val.yaml
    │   │   ├── clip-vit-classification_TA8.yaml
    │   │   ├── clip-vit-classification_TA8_val.yaml
    │   │   ├── clip-vit-classification_TA8_with_control_task.yaml
    │   │   └── clip-vit-classification_TA8_L14.yaml
    │   ├── nyuv2_taskpool.yaml
    │   ├── LMEvalHarnessTaskPool
    │   │   └── lm_eval.yaml
    │   └── reward_model_evaluation.yaml
    ├── dataset
    │   ├── summarization
    │   │   ├── xsum.yaml
    │   │   ├── test
    │   │   │   └── xsum.yaml
    │   │   ├── train
    │   │   │   └── xsum.yaml
    │   │   └── val
    │   │   │   └── xsum.yaml
    │   ├── image_classification
    │   │   ├── test
    │   │   │   ├── mnist.yaml
    │   │   │   ├── dtd.yaml
    │   │   │   ├── fer2013.yaml
    │   │   │   ├── gtsrb.yaml
    │   │   │   ├── kmnist.yaml
    │   │   │   ├── stl10.yaml
    │   │   │   ├── sun397.yaml
    │   │   │   ├── cifar10.yaml
    │   │   │   ├── eurosat.yaml
    │   │   │   ├── food101.yaml
    │   │   │   ├── pcam.yaml
    │   │   │   ├── cifar100.yaml
    │   │   │   ├── resisc45.yaml
    │   │   │   ├── cub-200-2011.yaml
    │   │   │   ├── emnist_mnist.yaml
    │   │   │   ├── rendered-sst2.yaml
    │   │   │   ├── emnist_letters.yaml
    │   │   │   ├── oxford-iiit-pet.yaml
    │   │   │   ├── stanford-cars.yaml
    │   │   │   ├── svhn.yaml
    │   │   │   ├── tiny-imagenet.yaml
    │   │   │   ├── fashion_mnist.yaml
    │   │   │   ├── mango-leaf-disease.yaml
    │   │   │   ├── oxford_flowers102.yaml
    │   │   │   └── the_eight_tasks.yaml
    │   │   ├── train
    │   │   │   ├── mnist.yaml
    │   │   │   ├── dtd.yaml
    │   │   │   ├── fer2013.yaml
    │   │   │   ├── gtsrb.yaml
    │   │   │   ├── stl10.yaml
    │   │   │   ├── cifar10.yaml
    │   │   │   ├── eurosat.yaml
    │   │   │   ├── food101.yaml
    │   │   │   ├── kmnist.yaml
    │   │   │   ├── pcam.yaml
    │   │   │   ├── sun397.yaml
    │   │   │   ├── cifar100.yaml
    │   │   │   ├── resisc45.yaml
    │   │   │   ├── emnist_mnist.yaml
    │   │   │   ├── cub-200-2011.yaml
    │   │   │   ├── oxford-iiit-pet.yaml
    │   │   │   ├── rendered-sst2.yaml
    │   │   │   ├── stanford-cars.yaml
    │   │   │   ├── svhn.yaml
    │   │   │   ├── tiny-imagenet.yaml
    │   │   │   ├── emnist_letters.yaml
    │   │   │   ├── fashion_mnist.yaml
    │   │   │   ├── mango-leaf-disease.yaml
    │   │   │   ├── oxford_flowers102.yaml
    │   │   │   └── the_eight_tasks.yaml
    │   │   ├── val
    │   │   │   ├── the_eight_tasks.yaml
    │   │   │   ├── dtd.yaml
    │   │   │   ├── mnist.yaml
    │   │   │   ├── gtsrb.yaml
    │   │   │   ├── sun397.yaml
    │   │   │   ├── eurosat.yaml
    │   │   │   ├── resisc45.yaml
    │   │   │   ├── stanford-cars.yaml
    │   │   │   └── svhn.yaml
    │   │   └── README.md
    │   ├── text_generation
    │   │   ├── test
    │   │   │   ├── gsm8k.yaml
    │   │   │   ├── gsm-hard.yaml
    │   │   │   └── gsm8k_question_label.yaml
    │   │   └── train
    │   │   │   ├── gsm8k.yaml
    │   │   │   ├── CodeAlpaca-20k.yaml
    │   │   │   └── gsm8k_question_label.yaml
    │   ├── llm_sft
    │   │   ├── ultrachat_200k.yaml
    │   │   └── alpaca_cleaned.yaml
    │   └── question_answering
    │   │   ├── train
    │   │       ├── MetaMathQA.yaml
    │   │       └── search_qa.yaml
    │   │   ├── search_qa.yaml
    │   │   ├── test
    │   │       └── search_qa.yaml
    │   │   └── val
    │   │       └── search_qa.yaml
    ├── fabric
    │   ├── loggers
    │   │   ├── mlflow_logger.yaml
    │   │   ├── wandb_logger.yaml
    │   │   ├── swandb_logger.yaml
    │   │   ├── csv_logger.yaml
    │   │   └── tensorboard_logger.yaml
    │   └── strategy
    │   │   ├── llama_fsdp.yaml
    │   │   ├── llama_peft_fsdp.yaml
    │   │   └── deepspeed.yaml
    ├── method
    │   ├── doge_ta
    │   │   └── doge_ta.yaml
    │   ├── isotropic_merging
    │   │   ├── iso_c.yaml
    │   │   └── iso_cts.yaml
    │   ├── dare
    │   │   ├── simple_average.yaml
    │   │   ├── task_arithmetic.yaml
    │   │   └── ties_merging.yaml
    │   ├── tall_mask
    │   │   └── task_arithmetic.yaml
    │   ├── pruning
    │   │   ├── magnitude_diff_pruning.yaml
    │   │   └── llama_random_pruning.yaml
    │   ├── analysis
    │   │   ├── task_vector_violin_plot.yaml
    │   │   └── task_vector_cos_similarity.yaml
    │   ├── ada_svd
    │   │   └── clip_vision.yaml
    │   ├── classification
    │   │   └── image_classification_finetune_test.yaml
    │   ├── trust_region
    │   │   └── clip_task_arithmetic.yaml
    │   ├── expert_sparsity
    │   │   └── README.md
    │   ├── task_singular_vector
    │   │   └── TaskSingularVectorMerging.yaml
    │   ├── fw_merging
    │   │   ├── fw_hard.yaml
    │   │   └── fw_soft.yaml
    │   ├── wudi
    │   │   └── wudi.yaml
    │   ├── regmean
    │   │   └── regmean.yaml
    │   ├── mixtral_moe_merging.yaml
    │   ├── linear
    │   │   ├── task_arithmetic_for_causallm.yaml
    │   │   └── weighted_average.yaml
    │   ├── dummy.yaml
    │   ├── bitdelta
    │   │   └── bitdelta.yaml
    │   ├── smile_upscaling
    │   │   └── singular_projection_merging.yaml
    │   ├── ensemble
    │   │   └── max_model_predictor.yaml
    │   ├── moe_pruner
    │   │   └── moe_pruner.yaml
    │   └── slerp
    │   │   └── slerp_lm.yaml
    ├── _get_started
    │   ├── greeting_program.yaml
    │   └── llm_slerp.yaml
    ├── modelpool
    │   ├── CausalLMPool
    │   │   ├── mistral-7b.yaml
    │   │   ├── vicuna-7b-v1.5.yaml
    │   │   ├── Qwen2.5-7B-math_and_coder.yaml
    │   │   ├── qwen2_math_1.5B_and_R1.yaml
    │   │   ├── Qwen2.5-1.5B_math_and_code.yaml
    │   │   ├── Qwen2.5-1.5B_three_models.yaml
    │   │   ├── llama-7b_3-models_v1.yaml
    │   │   ├── mixtral_moe_merging.yaml
    │   │   ├── simle_mixtral_exp_v4.yaml
    │   │   └── mergebench
    │   │   │   ├── gemma-2-2b.yaml
    │   │   │   ├── gemma-2-9b.yaml
    │   │   │   ├── Llama-3.1-8B.yaml
    │   │   │   ├── Llama-3.2-3B.yaml
    │   │   │   ├── gemma-2-2b-it.yaml
    │   │   │   └── gemma-2-9b-it.yaml
    │   ├── CLIPVisionModelPool
    │   │   ├── clip-vit-base-patch32_individual.yaml
    │   │   ├── clip-vit-base-patch32_mtl.yaml
    │   │   ├── _template.yaml
    │   │   ├── clip-vit-base-patch16_TA8_model_only.yaml
    │   │   ├── clip-vit-base-patch32_single_finetuned.yaml
    │   │   ├── clip-vit-base-patch32_svhn_and_mnist.yaml
    │   │   ├── clip-vit-base-patch32_single_task_projection.yaml
    │   │   ├── clip-vit-base-patch16_TA8.yaml
    │   │   ├── clip-vit-base-patch16_TALL14_model_only.yaml
    │   │   ├── clip-vit-large-patch14_TALL14_model_only.yaml
    │   │   ├── clip-vit-base-patch32_two_tasks_control_task.yaml
    │   │   └── clip-vit-large-patch14_individual.yaml
    │   ├── Seq2SeqLMPool
    │   │   ├── flan-t5-base_individual.yaml
    │   │   ├── _template.yaml
    │   │   └── flan-t5-base_glue.yaml
    │   ├── OpenCLIPVisionModelPool
    │   │   └── ViT-B-32_individual.yaml
    │   ├── ConvNextForImageClassification
    │   │   └── convnext-base-224.yaml
    │   ├── automodelpool.yaml
    │   ├── Dinov2ForImageClassification
    │   │   └── dinov2-base-imagenet1k-1-layer.yaml
    │   ├── smile_mistral_exp_v1.yaml
    │   ├── smile_mistral_exp_v3.yaml
    │   ├── smile_mistral_exp_v2.yaml
    │   └── ResNetForImageClassification
    │   │   └── transformers
    │   │       ├── resnet18_dtd.yaml
    │   │       ├── resnet50_dtd.yaml
    │   │       ├── resnet152_dtd.yaml
    │   │       ├── resnet18_pcam.yaml
    │   │       ├── resnet18_svhn.yaml
    │   │       ├── resnet50_pcam.yaml
    │   │       ├── resnet50_svhn.yaml
    │   │       ├── resnet152_gtsrb.yaml
    │   │       ├── resnet152_mnist.yaml
    │   │       ├── resnet152_pcam.yaml
    │   │       ├── resnet152_stl10.yaml
    │   │       ├── resnet152_svhn.yaml
    │   │       ├── resnet18_gtsrb.yaml
    │   │       ├── resnet18_mnist.yaml
    │   │       ├── resnet18_stl10.yaml
    │   │       ├── resnet50_gtsrb.yaml
    │   │       ├── resnet50_mnist.yaml
    │   │       ├── resnet50_stl10.yaml
    │   │       ├── resnet152_kmnist.yaml
    │   │       ├── resnet152_sun397.yaml
    │   │       ├── resnet18_kmnist.yaml
    │   │       ├── resnet18_sun397.yaml
    │   │       ├── resnet50_kmnist.yaml
    │   │       ├── resnet50_sun397.yaml
    │   │       ├── resnet152_cifar10.yaml
    │   │       ├── resnet152_eurosat.yaml
    │   │       ├── resnet152_fer2013.yaml
    │   │       ├── resnet152_food101.yaml
    │   │       ├── resnet18_cifar10.yaml
    │   │       ├── resnet18_cifar100.yaml
    │   │       ├── resnet18_eurosat.yaml
    │   │       ├── resnet18_fer2013.yaml
    │   │       ├── resnet18_food101.yaml
    │   │       ├── resnet18_resisc45.yaml
    │   │       ├── resnet50_cifar10.yaml
    │   │       ├── resnet50_cifar100.yaml
    │   │       ├── resnet50_eurosat.yaml
    │   │       ├── resnet50_fer2013.yaml
    │   │       ├── resnet50_food101.yaml
    │   │       ├── resnet50_resisc45.yaml
    │   │       ├── resnet152_cifar100.yaml
    │   │       ├── resnet152_resisc45.yaml
    │   │       ├── resnet18_fashion_mnist.yaml
    │   │       ├── resnet18_rendered-sst2.yaml
    │   │       ├── resnet18_stanford-cars.yaml
    │   │       ├── resnet50_fashion_mnist.yaml
    │   │       ├── resnet50_rendered-sst2.yaml
    │   │       ├── resnet50_stanford-cars.yaml
    │   │       ├── resnet152_fashion_mnist.yaml
    │   │       ├── resnet152_rendered-sst2.yaml
    │   │       ├── resnet152_stanford-cars.yaml
    │   │       ├── resnet18_emnist_letters.yaml
    │   │       ├── resnet50_emnist_letters.yaml
    │   │       ├── resnet152_emnist_letters.yaml
    │   │       ├── resnet152_oxford-iiit-pet.yaml
    │   │       ├── resnet18_oxford-iiit-pet.yaml
    │   │       ├── resnet50_oxford-iiit-pet.yaml
    │   │       ├── resnet152_oxford_flowers102.yaml
    │   │       ├── resnet18_oxford_flowers102.yaml
    │   │       └── resnet50_oxford_flowers102.yaml
    ├── nyuv2_config.yaml
    ├── llama_full_finetune.yaml
    ├── hydra
    │   └── default.yaml
    └── clip-vit-base-patch32_robustness_corrupted.yaml
├── fusion_bench
    ├── compat
    │   └── __init__.py
    ├── metrics
    │   ├── __init__.py
    │   ├── continual_learning
    │   │   └── __init__.py
    │   ├── model_kinship
    │   │   └── __init__.py
    │   └── text_to_image_generation
    │   │   └── __init__.py
    ├── scripts
    │   ├── __init__.py
    │   └── clip
    │   │   └── __init__.py
    ├── method
    │   ├── knots
    │   │   └── __init__.py
    │   ├── bitdelta
    │   │   ├── bitdelta_utils
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── dop
    │   │   └── __init__.py
    │   ├── model_stock
    │   │   └── __init__.py
    │   ├── wudi
    │   │   └── __init__.py
    │   ├── pruning
    │   │   ├── sparsegpt_utils
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── wanda_utils
    │   │   │   └── __init__.py
    │   ├── moe_pruner
    │   │   ├── utils
    │   │   │   └── __init__.py
    │   │   ├── hooks
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── doge_ta
    │   │   └── __init__.py
    │   ├── tall_mask
    │   │   └── __init__.py
    │   ├── slerp
    │   │   └── __init__.py
    │   ├── ties_merging
    │   │   └── __init__.py
    │   ├── ada_svd
    │   │   └── __init__.py
    │   ├── dawe
    │   │   └── __init__.py
    │   ├── pwe_moe
    │   │   ├── phn
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── surgery
    │   │   └── __init__.py
    │   ├── fw_merging
    │   │   └── __init__.py
    │   ├── trust_region
    │   │   └── __init__.py
    │   ├── task_arithmetic
    │   │   └── __init__.py
    │   ├── sparselo
    │   │   └── __init__.py
    │   ├── analysis
    │   │   └── __init__.py
    │   ├── rankone_moe
    │   │   └── __init__.py
    │   ├── weighted_average
    │   │   └── __init__.py
    │   ├── sparse_we_moe
    │   │   └── __init__.py
    │   ├── we_moe
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── depth_upscaling
    │   │   └── __init__.py
    │   ├── regmean_plusplus
    │   │   └── __init__.py
    │   ├── dare
    │   │   └── __init__.py
    │   ├── lm_finetune
    │   │   ├── __init__.py
    │   │   └── causal_lm_pretrain.py
    │   ├── regmean
    │   │   └── __init__.py
    │   ├── smile_upscaling
    │   │   └── __init__.py
    │   ├── gossip
    │   │   └── __init__.py
    │   ├── opcm
    │   │   └── __init__.py
    │   ├── mixture_of_experts
    │   │   └── __init__.py
    │   ├── fisher_merging
    │   │   └── __init__.py
    │   ├── expert_sparsity
    │   │   └── __init__.py
    │   ├── task_singular_vector
    │   │   ├── __init__.py
    │   │   ├── utils
    │   │   │   └── __init__.py
    │   │   └── TSVC.py
    │   └── linear
    │   │   └── __init__.py
    ├── mixins
    │   ├── optim
    │   │   └── __init__.py
    │   └── openclip_classification.py
    ├── models
    │   ├── nyuv2
    │   │   └── __init__.py
    │   ├── linearized
    │   │   └── __init__.py
    │   ├── smile_moe
    │   │   └── __init__.py
    │   ├── wrappers
    │   │   └── __init__.py
    │   ├── expert_sparsity
    │   │   └── __init__.py
    │   ├── llama
    │   │   └── model_utils
    │   │   │   └── __init__.py
    │   ├── surgery
    │   │   └── __init__.py
    │   ├── masks
    │   │   └── __init__.py
    │   ├── chat_templates
    │   │   └── __init__.py
    │   ├── modeling_losparse_llama
    │   │   ├── __init__.py
    │   │   └── register.py
    │   ├── open_clip
    │   │   └── __init__.py
    │   ├── modeling_smile_llama
    │   │   ├── __init__.py
    │   │   └── register.py
    │   ├── modeling_smile_mistral
    │   │   ├── __init__.py
    │   │   └── register.py
    │   ├── modeling_smile_qwen2
    │   │   ├── __init__.py
    │   │   └── register.py
    │   └── modeling_smile_gemma2
    │   │   └── __init__.py
    ├── utils
    │   ├── plot
    │   │   └── __init__.py
    │   ├── strenum
    │   │   └── README.md
    │   └── set.py
    ├── dataset
    │   ├── llama
    │   │   ├── utils
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── image_corruption
    │   │   └── __init__.py
    │   ├── arc_agi
    │   │   └── __init__.py
    │   ├── imdb.py
    │   └── fer2013.py
    ├── taskpool
    │   ├── clip_vision
    │   │   ├── utils
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── llama
    │   │   └── __init__.py
    │   ├── openclip_vision
    │   │   └── __init__.py
    │   └── lm_eval_harness
    │   │   └── __init__.py
    ├── tasks
    │   ├── flan_t5_text_generation
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── clip_classification
    │   │   ├── clip_dataset.py
    │   │   ├── rendered_sst2.py
    │   │   ├── mnist.py
    │   │   ├── svhn.py
    │   │   ├── pcam.py
    │   │   ├── emnist_mnist.py
    │   │   ├── stl10.py
    │   │   ├── kmnist.py
    │   │   ├── fashion_mnist.py
    │   │   └── fer2013.py
    │   └── base_task.py
    ├── optim
    │   └── lr_scheduler
    │   │   └── utils
    │   │       └── __init__.py
    ├── _get_started
    │   └── __init__.py
    ├── modelpool
    │   ├── clip_vision
    │   │   └── __init__.py
    │   ├── openclip_vision
    │   │   └── __init__.py
    │   ├── seq2seq_lm
    │   │   └── __init__.py
    │   ├── causal_lm
    │   │   └── __init__.py
    │   └── seq_classification_lm
    │   │   └── __init__.py
    ├── __main__.py
    └── constants
    │   └── __init__.py
├── examples
    ├── opcm
    │   └── .gitignore
    ├── open_clip
    │   ├── src
    │   │   └── __init__.py
    │   ├── requirements.txt
    │   ├── .gitignore
    │   └── evaluate_single_model.sh
    ├── clip_finetune
    │   ├── .gitignore
    │   └── config
    │   │   ├── .gitignore
    │   │   └── modelpool
    │   │       ├── clip-finetune_TALL14.yaml
    │   │       └── clip-finetune_TALL20.yaml
    ├── hyperparam_search
    │   └── .gitignore
    ├── mergebench
    │   ├── .gitignore
    │   ├── requirements.txt
    │   ├── evaluate_all.sh
    │   ├── evaluate_gemma-2-2b.sh
    │   ├── evaluate_gemma-2-9b.sh
    │   ├── evaluate_Llama-3.1-8B.sh
    │   ├── evaluate_Llama-3.2-3B.sh
    │   ├── evaluate_gemma-2-2b-it.sh
    │   ├── evaluate_gemma-2-9b-it.sh
    │   └── evaluate_Llama-3.2-3B-Instruct.sh
    ├── iterative_sparselo_pruning
    │   └── .gitignore
    ├── smile_upscaling
    │   ├── .gitignore
    │   └── SMILE.png
    ├── README.md
    ├── ada_svd
    │   └── clip_vision.sh
    ├── adamerging
    │   ├── gpt_2.sh
    │   └── flan_t5_base.sh
    ├── gossip
    │   ├── flan_t5.sh
    │   └── clip.sh
    ├── trust_region
    │   └── READMD.md
    └── randes
    │   └── clip-vit-base-patch32.sh
├── .flake8
├── requirements.txt
├── set_cache_dir.sh
└── .github
    └── ISSUE_TEMPLATE
        └── config.yml


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/config:
--------------------------------------------------------------------------------
1 | ../config/


--------------------------------------------------------------------------------
/.vscode/.gitignore:
--------------------------------------------------------------------------------
1 | *.json


--------------------------------------------------------------------------------
/fusion_bench_config:
--------------------------------------------------------------------------------
1 | config


--------------------------------------------------------------------------------
/tests/test_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/.gitignore:
--------------------------------------------------------------------------------
1 | *.local.yaml


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /_figure_sources/


--------------------------------------------------------------------------------
/fusion_bench/compat/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_method/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_mixins/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_modelpool/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/algorithms/model_stitching.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/misc.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/opcm/.gitignore:
--------------------------------------------------------------------------------
1 | images/
2 | 


--------------------------------------------------------------------------------
/examples/open_clip/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/method/knots/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/mixins/optim/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/models/nyuv2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/scripts/clip/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/utils/plot/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/algorithms/specification_ensemble.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/clip_finetune/.gitignore:
--------------------------------------------------------------------------------
1 | /tanganke/


--------------------------------------------------------------------------------
/fusion_bench/models/linearized/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/models/smile_moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/models/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/hyperparam_search/.gitignore:
--------------------------------------------------------------------------------
1 | *.db
2 | 


--------------------------------------------------------------------------------
/examples/mergebench/.gitignore:
--------------------------------------------------------------------------------
1 | /results/
2 | 


--------------------------------------------------------------------------------
/fusion_bench/dataset/llama/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/models/expert_sparsity/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E501, W503, E203
3 | 


--------------------------------------------------------------------------------
/examples/clip_finetune/config/.gitignore:
--------------------------------------------------------------------------------
1 | *.local.yaml


--------------------------------------------------------------------------------
/examples/iterative_sparselo_pruning/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf


--------------------------------------------------------------------------------
/fusion_bench/dataset/image_corruption/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/models/llama/model_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/taskpool/clip_vision/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/method/bitdelta/bitdelta_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/flan_t5_text_generation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/open_clip/requirements.txt:
--------------------------------------------------------------------------------
1 | open-clip-torch==2.0.2
2 | 


--------------------------------------------------------------------------------
/docs/css/material_extra.css:
--------------------------------------------------------------------------------
1 | .md-grid {
2 |     max-width: 100%;
3 | }


--------------------------------------------------------------------------------
/fusion_bench/dataset/llama/__init__.py:
--------------------------------------------------------------------------------
1 | from . import collate
2 | 


--------------------------------------------------------------------------------
/docs/algorithms/layer_recombination.md:
--------------------------------------------------------------------------------
1 | # Layer Recombination
2 | 
3 | 


--------------------------------------------------------------------------------
/examples/mergebench/requirements.txt:
--------------------------------------------------------------------------------
1 | immutabledict
2 | langdetect
3 | 


--------------------------------------------------------------------------------
/fusion_bench/method/dop/__init__.py:
--------------------------------------------------------------------------------
1 | from .dop import ContinualDOPForCLIP
2 | 


--------------------------------------------------------------------------------
/fusion_bench/method/model_stock/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_stock import ModelStock
2 | 


--------------------------------------------------------------------------------
/fusion_bench/optim/lr_scheduler/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .visualization import *
2 | 


--------------------------------------------------------------------------------
/fusion_bench/method/wudi/__init__.py:
--------------------------------------------------------------------------------
1 | from .wudi import WUDIMerging, wudi_merging
2 | 


--------------------------------------------------------------------------------
/fusion_bench/_get_started/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Tutorial module for FusionBench
3 | """
4 | 


--------------------------------------------------------------------------------
/fusion_bench/method/pruning/sparsegpt_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparsegpt import SparseGPT
2 | 


--------------------------------------------------------------------------------
/fusion_bench/modelpool/clip_vision/__init__.py:
--------------------------------------------------------------------------------
1 | from .modelpool import CLIPVisionModelPool
2 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .base_task import BaseTask
3 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16.yaml:
--------------------------------------------------------------------------------
1 | _pretrained_: openai/clip-vit-base-patch16
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_dtd.yaml:
--------------------------------------------------------------------------------
1 | dtd: tanganke/clip-vit-base-patch16_dtd
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32.yaml:
--------------------------------------------------------------------------------
1 | _pretrained_: openai/clip-vit-base-patch32
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_dtd.yaml:
--------------------------------------------------------------------------------
1 | dtd: tanganke/clip-vit-base-patch32_dtd
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14.yaml:
--------------------------------------------------------------------------------
1 | _pretrained_: openai/clip-vit-large-patch14
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_dtd.yaml:
--------------------------------------------------------------------------------
1 | dtd: tanganke/clip-vit-large-patch14_dtd
2 | 


--------------------------------------------------------------------------------
/docs/images/llm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/llm.png


--------------------------------------------------------------------------------
/examples/open_clip/.gitignore:
--------------------------------------------------------------------------------
1 | ./outputs/
2 | ./.cache/
3 | ./checkpoints/
4 | ./tall_masks/
5 | 


--------------------------------------------------------------------------------
/fusion_bench/method/moe_pruner/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .score import layer_load_balance_score
2 | 


--------------------------------------------------------------------------------
/fusion_bench/models/surgery/__init__.py:
--------------------------------------------------------------------------------
1 | from .surgerymodelwrapper import SurgeryModelWrapper
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml:
--------------------------------------------------------------------------------
1 | gtsrb: tanganke/clip-vit-base-patch16_gtsrb
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_mnist.yaml:
--------------------------------------------------------------------------------
1 | mnist: tanganke/clip-vit-base-patch16_mnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_pcam.yaml:
--------------------------------------------------------------------------------
1 | pcam: tanganke/clip-vit-base-patch16_pcam
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_stl10.yaml:
--------------------------------------------------------------------------------
1 | stl10: tanganke/clip-vit-base-patch16_stl10
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_svhn.yaml:
--------------------------------------------------------------------------------
1 | svhn: tanganke/clip-vit-base-patch16_svhn
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml:
--------------------------------------------------------------------------------
1 | gtsrb: tanganke/clip-vit-base-patch32_gtsrb
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_mnist.yaml:
--------------------------------------------------------------------------------
1 | mnist: tanganke/clip-vit-base-patch32_mnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_pcam.yaml:
--------------------------------------------------------------------------------
1 | pcam: tanganke/clip-vit-base-patch32_pcam
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_stl10.yaml:
--------------------------------------------------------------------------------
1 | stl10: tanganke/clip-vit-base-patch32_stl10
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_svhn.yaml:
--------------------------------------------------------------------------------
1 | svhn: tanganke/clip-vit-base-patch32_svhn
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_pcam.yaml:
--------------------------------------------------------------------------------
1 | pcam: tanganke/clip-vit-large-patch14_pcam
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_svhn.yaml:
--------------------------------------------------------------------------------
1 | svhn: tanganke/clip-vit-large-patch14_svhn
2 | 


--------------------------------------------------------------------------------
/config/taskpool/dummy.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.taskpool.DummyTaskPool
2 | model_save_path: null
3 | 


--------------------------------------------------------------------------------
/fusion_bench/modelpool/openclip_vision/__init__.py:
--------------------------------------------------------------------------------
1 | from .modelpool import OpenCLIPVisionModelPool
2 | 


--------------------------------------------------------------------------------
/fusion_bench/taskpool/llama/__init__.py:
--------------------------------------------------------------------------------
1 | from .test_generation import LlamaTestGenerationTaskPool
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml:
--------------------------------------------------------------------------------
1 | cifar10: tanganke/clip-vit-base-patch16_cifar10
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml:
--------------------------------------------------------------------------------
1 | eurosat: tanganke/clip-vit-base-patch16_eurosat
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml:
--------------------------------------------------------------------------------
1 | fer2013: tanganke/clip-vit-base-patch16_fer2013
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_food101.yaml:
--------------------------------------------------------------------------------
1 | food101: tanganke/clip-vit-base-patch16_food101
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml:
--------------------------------------------------------------------------------
1 | kmnist: tanganke/clip-vit-base-patch16_kmnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_sun397.yaml:
--------------------------------------------------------------------------------
1 | sun397: tanganke/clip-vit-base-patch16_sun397
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml:
--------------------------------------------------------------------------------
1 | cifar10: tanganke/clip-vit-base-patch32_cifar10
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml:
--------------------------------------------------------------------------------
1 | eurosat: tanganke/clip-vit-base-patch32_eurosat
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml:
--------------------------------------------------------------------------------
1 | fer2013: tanganke/clip-vit-base-patch32_fer2013
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_food101.yaml:
--------------------------------------------------------------------------------
1 | food101: tanganke/clip-vit-base-patch32_food101
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml:
--------------------------------------------------------------------------------
1 | kmnist: tanganke/clip-vit-base-patch32_kmnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_sun397.yaml:
--------------------------------------------------------------------------------
1 | sun397: tanganke/clip-vit-base-patch32_sun397
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml:
--------------------------------------------------------------------------------
1 | gtsrb: tanganke/clip-vit-large-patch14_gtsrb
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml:
--------------------------------------------------------------------------------
1 | kmnist: tanganke/clip-vit-large-patch14_kmnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_mnist.yaml:
--------------------------------------------------------------------------------
1 | mnist: tanganke/clip-vit-large-patch14_mnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_stl10.yaml:
--------------------------------------------------------------------------------
1 | stl10: tanganke/clip-vit-large-patch14_stl10
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_sun397.yaml:
--------------------------------------------------------------------------------
1 | sun397: tanganke/clip-vit-large-patch14_sun397
2 | 


--------------------------------------------------------------------------------
/fusion_bench/method/doge_ta/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .doge_ta import DOGE_TA_Algorithm
3 | 


--------------------------------------------------------------------------------
/fusion_bench/method/tall_mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .task_arithmetic import TallMaskTaskArithmeticAlgorithm
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml:
--------------------------------------------------------------------------------
1 | cifar100: tanganke/clip-vit-base-patch16_cifar100
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml:
--------------------------------------------------------------------------------
1 | resisc45: tanganke/clip-vit-base-patch16_resisc45
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml:
--------------------------------------------------------------------------------
1 | cifar100: tanganke/clip-vit-base-patch32_cifar100
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml:
--------------------------------------------------------------------------------
1 | resisc45: tanganke/clip-vit-base-patch32_resisc45
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml:
--------------------------------------------------------------------------------
1 | cifar10: tanganke/clip-vit-large-patch14_cifar10
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml:
--------------------------------------------------------------------------------
1 | cifar100: tanganke/clip-vit-large-patch14_cifar100
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml:
--------------------------------------------------------------------------------
1 | eurosat: tanganke/clip-vit-large-patch14_eurosat
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml:
--------------------------------------------------------------------------------
1 | fer2013: tanganke/clip-vit-large-patch14_fer2013
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_food101.yaml:
--------------------------------------------------------------------------------
1 | food101: tanganke/clip-vit-large-patch14_food101
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml:
--------------------------------------------------------------------------------
1 | resisc45: tanganke/clip-vit-large-patch14_resisc45
2 | 


--------------------------------------------------------------------------------
/docs/images/model_mixing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_mixing.png


--------------------------------------------------------------------------------
/fusion_bench/metrics/continual_learning/__init__.py:
--------------------------------------------------------------------------------
1 | from .backward_transfer import compute_backward_transfer
2 | 


--------------------------------------------------------------------------------
/fusion_bench/modelpool/seq2seq_lm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .modelpool import Seq2SeqLMPool
3 | 


--------------------------------------------------------------------------------
/fusion_bench/taskpool/openclip_vision/__init__.py:
--------------------------------------------------------------------------------
1 | from .openclip_taskpool import OpenCLIPVisionModelTaskPool
2 | 


--------------------------------------------------------------------------------
/config/dataset/summarization/xsum.yaml:
--------------------------------------------------------------------------------
1 | xsum:
2 |   _target_: datasets.load_dataset
3 |   path: EdinburghNLP/xsum
4 | 


--------------------------------------------------------------------------------
/docs/images/model_ensemble.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_ensemble.png


--------------------------------------------------------------------------------
/docs/images/model_merging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_merging.png


--------------------------------------------------------------------------------
/examples/smile_upscaling/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 | /collected_results/
3 | /outputs/
4 | /results/
5 | /results.backup/
6 | 


--------------------------------------------------------------------------------
/fusion_bench/models/masks/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .mask_model import MaskModel, mask_sparsity
3 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/clip_dataset.py:
--------------------------------------------------------------------------------
1 | from fusion_bench.dataset.clip_dataset import CLIPDataset
2 | 


--------------------------------------------------------------------------------
/config/fabric/loggers/mlflow_logger.yaml:
--------------------------------------------------------------------------------
1 | # https://mlflow.org/
2 | _target_: lightning.pytorch.loggers.MLFlowLogger
3 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/ewemoe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ewemoe.png


--------------------------------------------------------------------------------
/docs/algorithms/images/wemoe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/wemoe.png


--------------------------------------------------------------------------------
/docs/api/fusion_bench.optim.md:
--------------------------------------------------------------------------------
1 | # fusion_bench.optim
2 | 
3 | ## MeZO optimizer
4 | 
5 | ::: fusion_bench.optim.MeZO
6 | 


--------------------------------------------------------------------------------
/docs/cli/images/vscode_debug.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/vscode_debug.png


--------------------------------------------------------------------------------
/docs/images/fusion_bench_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/fusion_bench_flow.png


--------------------------------------------------------------------------------
/docs/images/model_upscaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/model_upscaling.png


--------------------------------------------------------------------------------
/docs/modelpool/clip-vit-cos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/clip-vit-cos.png


--------------------------------------------------------------------------------
/docs/readinglist/images/watt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/watt.png


--------------------------------------------------------------------------------
/fusion_bench/__main__.py:
--------------------------------------------------------------------------------
1 | from fusion_bench.scripts.cli import main
2 | 
3 | if __name__ == "__main__":
4 |     main()
5 | 


--------------------------------------------------------------------------------
/fusion_bench/method/slerp/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .slerp import SlerpForCausalLM, SlerpMergeAlgorithm
3 | 


--------------------------------------------------------------------------------
/fusion_bench/method/ties_merging/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .ties_merging import TiesMergingAlgorithm
3 | 


--------------------------------------------------------------------------------
/config/method/doge_ta/doge_ta.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.DOGE_TA_Algorithm
2 | subspace: 6
3 | K: 30
4 | lamda: 0.07
5 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml:
--------------------------------------------------------------------------------
1 | fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml:
--------------------------------------------------------------------------------
1 | rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml:
--------------------------------------------------------------------------------
1 | stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml:
--------------------------------------------------------------------------------
1 | fashion_mnist: tanganke/clip-vit-base-patch32_fashion_mnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml:
--------------------------------------------------------------------------------
1 | rendered-sst2: tanganke/clip-vit-base-patch32_rendered-sst2
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml:
--------------------------------------------------------------------------------
1 | stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml:
--------------------------------------------------------------------------------
1 | fashion_mnist: tanganke/clip-vit-large-patch14_fashion_mnist
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml:
--------------------------------------------------------------------------------
1 | rendered-sst2: tanganke/clip-vit-large-patch14_rendered-sst2
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml:
--------------------------------------------------------------------------------
1 | stanford-cars: tanganke/clip-vit-large-patch14_stanford-cars
2 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/bitdelta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/bitdelta.png


--------------------------------------------------------------------------------
/docs/algorithms/images/ewemoe_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ewemoe_1.png


--------------------------------------------------------------------------------
/docs/algorithms/images/ewemoe_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ewemoe_2.png


--------------------------------------------------------------------------------
/docs/algorithms/images/pwe_moe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/pwe_moe.png


--------------------------------------------------------------------------------
/docs/algorithms/images/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/sigmoid.png


--------------------------------------------------------------------------------
/docs/cli/images/pycharm_debug_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/pycharm_debug_1.png


--------------------------------------------------------------------------------
/docs/cli/images/pycharm_debug_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/pycharm_debug_2.png


--------------------------------------------------------------------------------
/docs/cli/images/pycharm_debug_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/pycharm_debug_3.png


--------------------------------------------------------------------------------
/docs/cli/images/tab_completion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/tab_completion.png


--------------------------------------------------------------------------------
/docs/images/learning_paradiagm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/learning_paradiagm.png


--------------------------------------------------------------------------------
/docs/readinglist/images/fusellm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/fusellm.png


--------------------------------------------------------------------------------
/docs/readinglist/images/lorahub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/lorahub.png


--------------------------------------------------------------------------------
/docs/readinglist/images/pwe_moe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/pwe_moe.png


--------------------------------------------------------------------------------
/examples/smile_upscaling/SMILE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/examples/smile_upscaling/SMILE.png


--------------------------------------------------------------------------------
/fusion_bench/method/ada_svd/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_vision import AdaSVDMergingForCLIPVisionModel
3 | 


--------------------------------------------------------------------------------
/fusion_bench/method/dawe/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .dawe_for_clip import DataAdaptiveWeightEnsemblingForCLIP
3 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml:
--------------------------------------------------------------------------------
1 | emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
1 | oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml:
--------------------------------------------------------------------------------
1 | emnist_letters: tanganke/clip-vit-base-patch32_emnist_letters
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
1 | oxford-iiit-pet: tanganke/clip-vit-base-patch32_oxford-iiit-pet
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml:
--------------------------------------------------------------------------------
1 | emnist_letters: tanganke/clip-vit-large-patch14_emnist_letters
2 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/adamerging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/adamerging.png


--------------------------------------------------------------------------------
/docs/algorithms/images/iso_merging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/iso_merging.png


--------------------------------------------------------------------------------
/docs/algorithms/images/solar10.7B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/solar10.7B.png


--------------------------------------------------------------------------------
/docs/cli/images/fusion_bench_webui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/cli/images/fusion_bench_webui.png


--------------------------------------------------------------------------------
/docs/images/multi-task_core_steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/multi-task_core_steps.png


--------------------------------------------------------------------------------
/docs/readinglist/images/forkmerge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/forkmerge.png


--------------------------------------------------------------------------------
/docs/readinglist/images/fs-merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/fs-merge.png


--------------------------------------------------------------------------------
/docs/readinglist/images/fusechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/fusechat.png


--------------------------------------------------------------------------------
/docs/readinglist/images/lora_lego.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/lora_lego.png


--------------------------------------------------------------------------------
/docs/readinglist/images/pituning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/pituning.png


--------------------------------------------------------------------------------
/fusion_bench/method/pwe_moe/phn/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .solvers import EPOSolver, LinearScalarizationSolver
3 | 


--------------------------------------------------------------------------------
/fusion_bench/method/surgery/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip_layer_wise_adamerging_surgery import CLIPLayerWiseAdaMergingSurgeryAlgorithm
2 | 


--------------------------------------------------------------------------------
/fusion_bench/utils/strenum/README.md:
--------------------------------------------------------------------------------
1 | This is a alternative implementation for `enum.StrEnum` (only available for python > 3.11).
2 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/mnist.yaml:
--------------------------------------------------------------------------------
1 | mnist:
2 |   _target_: datasets.load_dataset
3 |   path: mnist
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/mnist.yaml:
--------------------------------------------------------------------------------
1 | mnist:
2 |   _target_: datasets.load_dataset
3 |   path: mnist
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/method/isotropic_merging/iso_c.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.ISO_C_Merge
2 | scaling_factor: 1.0
3 | exclude_keys: null
4 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
1 | oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
1 | oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102
2 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
1 | oxford-iiit-pet: tanganke/clip-vit-large-patch14_oxford-iiit-pet
2 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/ties_merging.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ties_merging.jpg


--------------------------------------------------------------------------------
/docs/images/multi-task_model_fusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/multi-task_model_fusion.png


--------------------------------------------------------------------------------
/docs/modelpool/images/convnext_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/convnext_block.png


--------------------------------------------------------------------------------
/docs/readinglist/images/adapter_soup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/adapter_soup.png


--------------------------------------------------------------------------------
/docs/readinglist/images/twin_merging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/twin_merging.png


--------------------------------------------------------------------------------
/fusion_bench/models/chat_templates/__init__.py:
--------------------------------------------------------------------------------
1 | from .load_tokenizer import chat_template_mapping, load_tokenizer_with_chat_template
2 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/dtd.yaml:
--------------------------------------------------------------------------------
1 | dtd:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/dtd
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/fer2013.yaml:
--------------------------------------------------------------------------------
1 | fer2013:
2 |   _target_: fusion_bench.dataset.fer2013.load_fer2013
3 |   split: test
4 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/dtd.yaml:
--------------------------------------------------------------------------------
1 | dtd:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/dtd
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/summarization/test/xsum.yaml:
--------------------------------------------------------------------------------
1 | xsum:
2 |   _target_: datasets.load_dataset
3 |   path: EdinburghNLP/xsum
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/summarization/train/xsum.yaml:
--------------------------------------------------------------------------------
1 | xsum:
2 |   _target_: datasets.load_dataset
3 |   path: EdinburghNLP/xsum
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
1 | oxford_flowers102: tanganke/clip-vit-large-patch14_oxford_flowers102
2 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/Task Arithmetic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/Task Arithmetic.png


--------------------------------------------------------------------------------
/docs/algorithms/images/smile_upscaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/smile_upscaling.png


--------------------------------------------------------------------------------
/docs/algorithms/images/sparse_upcycling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/sparse_upcycling.png


--------------------------------------------------------------------------------
/docs/algorithms/images/wemoe_lr_tuning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/wemoe_lr_tuning.png


--------------------------------------------------------------------------------
/docs/images/accelerate model training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/accelerate model training.png


--------------------------------------------------------------------------------
/docs/images/framework_of_model_fusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/images/framework_of_model_fusion.png


--------------------------------------------------------------------------------
/docs/readinglist/images/depth_upscaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/depth_upscaling.png


--------------------------------------------------------------------------------
/docs/readinglist/images/scaling_smart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/scaling_smart.png


--------------------------------------------------------------------------------
/docs/readinglist/images/smile_upscaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/smile_upscaling.png


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # examples Folder
2 | 
3 | This directory contains various examples demonstrating the usage of the Fusion Bench library.
4 | 


--------------------------------------------------------------------------------
/fusion_bench/method/fw_merging/__init__.py:
--------------------------------------------------------------------------------
1 | from .fw_hard import FrankWolfeHardAlgorithm
2 | from .fw_soft import FrankWolfeSoftAlgorithm
3 | 


--------------------------------------------------------------------------------
/fusion_bench/method/trust_region/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_task_arithmetic import TaskArithmeticWithTrustRegionForCLIP
3 | 


--------------------------------------------------------------------------------
/fusion_bench/taskpool/lm_eval_harness/__init__.py:
--------------------------------------------------------------------------------
1 | from .taskpool import LMEvalHarnessTaskPool
2 | 
3 | __all__ = ["LMEvalHarnessTaskPool"]
4 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/gtsrb.yaml:
--------------------------------------------------------------------------------
1 | gtsrb:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/gtsrb
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/kmnist.yaml:
--------------------------------------------------------------------------------
1 | kmnist:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/kmnist
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/stl10.yaml:
--------------------------------------------------------------------------------
1 | stl10:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/stl10
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/sun397.yaml:
--------------------------------------------------------------------------------
1 | sun397:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/sun397
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/fer2013.yaml:
--------------------------------------------------------------------------------
1 | fer2013:
2 |   _target_: fusion_bench.dataset.fer2013.load_fer2013
3 |   split: train
4 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/gtsrb.yaml:
--------------------------------------------------------------------------------
1 | gtsrb:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/gtsrb
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/stl10.yaml:
--------------------------------------------------------------------------------
1 | stl10:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/stl10
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/summarization/val/xsum.yaml:
--------------------------------------------------------------------------------
1 | xsum:
2 |   _target_: datasets.load_dataset
3 |   path: EdinburghNLP/xsum
4 |   split: validation
5 | 


--------------------------------------------------------------------------------
/docs/guides/fusion_bench/mixins/lightning_fabric.md:
--------------------------------------------------------------------------------
1 | # LightningFabricMixin
2 | 
3 | ## Reference
4 | 
5 | ::: fusion_bench.mixins.lightning_fabric


--------------------------------------------------------------------------------
/docs/readinglist/images/Chronopoulou2023.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/Chronopoulou2023.png


--------------------------------------------------------------------------------
/docs/readinglist/images/enneng2024survey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/enneng2024survey.png


--------------------------------------------------------------------------------
/docs/readinglist/images/sparse-modelsoups.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/sparse-modelsoups.png


--------------------------------------------------------------------------------
/docs/readinglist/images/sparse_upcycling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/sparse_upcycling.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | matplotlib
 3 | scipy
 4 | torch
 5 | lightning
 6 | transformers
 7 | datasets
 8 | peft
 9 | h5py
10 | scipy
11 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/cifar10.yaml:
--------------------------------------------------------------------------------
1 | cifar10:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/cifar10
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/eurosat.yaml:
--------------------------------------------------------------------------------
1 | eurosat:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/eurosat
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/food101.yaml:
--------------------------------------------------------------------------------
1 | food101:
2 |   _target_: datasets.load_dataset
3 |   path: ethz/food101
4 |   split: validation
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/pcam.yaml:
--------------------------------------------------------------------------------
1 | pcam:
2 |   _target_: datasets.load_dataset
3 |   path: 1aurent/PatchCamelyon
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/cifar10.yaml:
--------------------------------------------------------------------------------
1 | cifar10:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/cifar10
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/eurosat.yaml:
--------------------------------------------------------------------------------
1 | eurosat:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/eurosat
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/food101.yaml:
--------------------------------------------------------------------------------
1 | food101:
2 |   _target_: datasets.load_dataset
3 |   path: ethz/food101
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/kmnist.yaml:
--------------------------------------------------------------------------------
1 | kmnist:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/kmnist
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/pcam.yaml:
--------------------------------------------------------------------------------
1 | pcam:
2 |   _target_: datasets.load_dataset
3 |   path: 1aurent/PatchCamelyon
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/sun397.yaml:
--------------------------------------------------------------------------------
1 | sun397:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/sun397
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/max-model_predictor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/max-model_predictor.png


--------------------------------------------------------------------------------
/docs/algorithms/images/wemoe_loss_landscape.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/wemoe_loss_landscape.png


--------------------------------------------------------------------------------
/docs/modelpool/images/clip_eight_corruption.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/clip_eight_corruption.png


--------------------------------------------------------------------------------
/docs/readinglist/images/branch_and_merging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/branch_and_merging.png


--------------------------------------------------------------------------------
/fusion_bench/method/task_arithmetic/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .task_arithmetic import TaskArithmeticAlgorithm, task_arithmetic_merge
3 | 


--------------------------------------------------------------------------------
/fusion_bench/modelpool/causal_lm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .causal_lm import CausalLMBackbonePool, CausalLMPool, load_peft_causal_lm
3 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/cifar100.yaml:
--------------------------------------------------------------------------------
1 | cifar100:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/cifar100
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/resisc45.yaml:
--------------------------------------------------------------------------------
1 | resisc45:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/resisc45
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/cifar100.yaml:
--------------------------------------------------------------------------------
1 | cifar100:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/cifar100
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/resisc45.yaml:
--------------------------------------------------------------------------------
1 | resisc45:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/resisc45
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/text_generation/test/gsm8k.yaml:
--------------------------------------------------------------------------------
1 | gsm8k:
2 |   _target_: datasets.load_dataset
3 |   path: openai/gsm8k
4 |   name: main
5 |   split: test
6 | 


--------------------------------------------------------------------------------
/config/dataset/text_generation/train/gsm8k.yaml:
--------------------------------------------------------------------------------
1 | gsm8k:
2 |   _target_: datasets.load_dataset
3 |   path: openai/gsm8k
4 |   name: main
5 |   split: train
6 | 


--------------------------------------------------------------------------------
/docs/readinglist/images/branch_and_merging_alg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/readinglist/images/branch_and_merging_alg.png


--------------------------------------------------------------------------------
/fusion_bench/method/sparselo/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .sparselo import IterativeSparseLoForLlama, PCPSparseLoForLlama, SparseLoForLlama
3 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/rendered_sst2.py:
--------------------------------------------------------------------------------
1 | classnames = ["negative", "positive"]
2 | 
3 | templates = [lambda c: f"a {c} review of a movie."]
4 | 


--------------------------------------------------------------------------------
/config/dataset/llm_sft/ultrachat_200k.yaml:
--------------------------------------------------------------------------------
1 | ultrachat-200k:
2 |   _target_: fusion_bench.dataset.ultrachat.load_tokenized_ultrachat_200k
3 |   tokenizer: ???
4 | 


--------------------------------------------------------------------------------
/config/dataset/question_answering/train/MetaMathQA.yaml:
--------------------------------------------------------------------------------
1 | MetaMathQA:
2 |   _target_: datasets.load_dataset
3 |   path: meta-math/MetaMathQA
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/concrete_subspace_learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/concrete_subspace_learning.png


--------------------------------------------------------------------------------
/docs/algorithms/images/fedmr_model_recombination.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/fedmr_model_recombination.jpg


--------------------------------------------------------------------------------
/docs/modelpool/images/NYUv2-0000003446-63769b25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/NYUv2-0000003446-63769b25.jpg


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/cub-200-2011.yaml:
--------------------------------------------------------------------------------
1 | cub-200-2011:
2 |   _target_: datasets.load_dataset
3 |   path: Donghyun99/CUB-200-2011
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/emnist_mnist.yaml:
--------------------------------------------------------------------------------
1 | emnist_mnist:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/emnist_mnist
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/rendered-sst2.yaml:
--------------------------------------------------------------------------------
1 | rendered-sst2:
2 |   _target_: datasets.load_dataset
3 |   path: nateraw/rendered-sst2
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/emnist_mnist.yaml:
--------------------------------------------------------------------------------
1 | emnist_mnist:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/emnist_mnist
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/text_generation/train/CodeAlpaca-20k.yaml:
--------------------------------------------------------------------------------
1 | MetaMathQA:
2 |   _target_: datasets.load_dataset
3 |   path: sahil2801/CodeAlpaca-20k
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/method/dare/simple_average.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.DareSimpleAverage
2 | sparsity_ratio: 0.5
3 | only_on_linear_weights: false
4 | rescale: true
5 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/regmean_vs_regmean_plusplus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/regmean_vs_regmean_plusplus.png


--------------------------------------------------------------------------------
/fusion_bench/method/bitdelta/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Adapted from https://github.com/FasterDecoding/BitDelta
3 | """
4 | 
5 | from .bitdelta import BitDeltaAlgorithm
6 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/emnist_letters.yaml:
--------------------------------------------------------------------------------
1 | emnist_letters:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/emnist_letters
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
1 | oxford-iiit-pet:
2 |   _target_: datasets.load_dataset
3 |   path: timm/oxford-iiit-pet
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/stanford-cars.yaml:
--------------------------------------------------------------------------------
1 | stanford-cars:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/stanford_cars
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/svhn.yaml:
--------------------------------------------------------------------------------
1 | svhn:
2 |   _target_: datasets.load_dataset
3 |   _args_:
4 |     - svhn
5 |     - cropped_digits
6 |   split: test
7 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/tiny-imagenet.yaml:
--------------------------------------------------------------------------------
1 | tiny-imagenet:
2 |   _target_: datasets.load_dataset
3 |   path: zh-plus/tiny-imagenet
4 |   split: valid
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/cub-200-2011.yaml:
--------------------------------------------------------------------------------
1 | cub-200-2011:
2 |   _target_: datasets.load_dataset
3 |   path: Donghyun99/CUB-200-2011
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
1 | oxford-iiit-pet:
2 |   _target_: datasets.load_dataset
3 |   path: timm/oxford-iiit-pet
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/rendered-sst2.yaml:
--------------------------------------------------------------------------------
1 | rendered-sst2:
2 |   _target_: datasets.load_dataset
3 |   path: nateraw/rendered-sst2
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/stanford-cars.yaml:
--------------------------------------------------------------------------------
1 | stanford-cars:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/stanford_cars
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/svhn.yaml:
--------------------------------------------------------------------------------
1 | svhn:
2 |   _target_: datasets.load_dataset
3 |   _args_:
4 |     - svhn
5 |     - cropped_digits
6 |   split: train
7 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/tiny-imagenet.yaml:
--------------------------------------------------------------------------------
1 | tiny-imagenet:
2 |   _target_: datasets.load_dataset
3 |   path: zh-plus/tiny-imagenet
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/method/isotropic_merging/iso_cts.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.ISO_CTS_Merge
2 | scaling_factor: 1.0
3 | common_space_fraction: 0.8
4 | exclude_keys: null
5 | 


--------------------------------------------------------------------------------
/fusion_bench/method/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | from .task_vector_cos_similarity import TaskVectorCosSimilarity
2 | from .task_vector_violin_plot import TaskVectorViolinPlot
3 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/fashion_mnist.yaml:
--------------------------------------------------------------------------------
1 | fashion_mnist:
2 |   _target_: datasets.load_dataset
3 |   path: zalando-datasets/fashion_mnist
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/emnist_letters.yaml:
--------------------------------------------------------------------------------
1 | emnist_letters:
2 |   _target_: datasets.load_dataset
3 |   path: tanganke/emnist_letters
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/method/tall_mask/task_arithmetic.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.tall_mask.TallMaskTaskArithmeticAlgorithm
2 | tall_mask_lambda: 0.6
3 | debug: 0
4 | verbose: 0
5 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/adamerging_layerwise_coefficients.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/adamerging_layerwise_coefficients.png


--------------------------------------------------------------------------------
/docs/algorithms/images/concrete_adamerging_vs_adamerging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/concrete_adamerging_vs_adamerging.png


--------------------------------------------------------------------------------
/fusion_bench/method/rankone_moe/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_rankone_moe import CLIPRankOneMoEAlgorithm
3 | from .rankone_moe import RankOneMoEAlgorithm
4 | 


--------------------------------------------------------------------------------
/fusion_bench/method/weighted_average/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .llama import WeightedAverageForLLama
3 | from .weighted_average import WeightedAverageAlgorithm
4 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/mango-leaf-disease.yaml:
--------------------------------------------------------------------------------
1 | mango-leaf-disease:
2 |   _target_: datasets.load_dataset
3 |   path: AfiqN/mango-leaf-disease
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/fashion_mnist.yaml:
--------------------------------------------------------------------------------
1 | fashion_mnist:
2 |   _target_: datasets.load_dataset
3 |   path: zalando-datasets/fashion_mnist
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/mango-leaf-disease.yaml:
--------------------------------------------------------------------------------
1 | mango-leaf-disease:
2 |   _target_: datasets.load_dataset
3 |   path: AfiqN/mango-leaf-disease
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base.yaml:
--------------------------------------------------------------------------------
1 | _pretrained_:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: google/flan-t5-base
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large.yaml:
--------------------------------------------------------------------------------
1 | _pretrained_:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: google/flan-t5-large
4 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/ties_merging_hyperparameter_tuning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/ties_merging_hyperparameter_tuning.png


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/profiling.md:
--------------------------------------------------------------------------------
1 | # Profiling Utilities
2 | 
3 | ::: fusion_bench.utils.timer
4 |     options:
5 |         show_root_full_path: true
6 |         heading_level: 3


--------------------------------------------------------------------------------
/fusion_bench/method/pwe_moe/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_pwe_moe import (
3 |     PWEMoELinearScalarizationForCLIP,
4 |     PWEMoExactParetoOptimalForCLIP,
5 | )
6 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/oxford_flowers102.yaml:
--------------------------------------------------------------------------------
1 | oxford_flowers102:
2 |   _target_: datasets.load_dataset
3 |   path: dpdl-benchmark/oxford_flowers102
4 |   split: test
5 | 


--------------------------------------------------------------------------------
/docs/algorithms/images/adamerging_model_merging_coefficients.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/images/adamerging_model_merging_coefficients.png


--------------------------------------------------------------------------------
/docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora.png


--------------------------------------------------------------------------------
/fusion_bench/method/sparse_we_moe/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparse_clip_we_moe import SparseCLIPWeightEnsemblingMoEAlgorithm
2 | from .sparse_we_moe import SparseWeightEnsemblingMoEAlgorithm
3 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/oxford_flowers102.yaml:
--------------------------------------------------------------------------------
1 | oxford_flowers102:
2 |   _target_: datasets.load_dataset
3 |   path: dpdl-benchmark/oxford_flowers102
4 |   split: train
5 | 


--------------------------------------------------------------------------------
/config/dataset/question_answering/search_qa.yaml:
--------------------------------------------------------------------------------
1 | search_qa:
2 |   _target_: datasets.load_dataset
3 |   _args_:
4 |     - search_qa
5 |     - train_test_val
6 |   trust_remote_code: true
7 | 


--------------------------------------------------------------------------------
/config/method/dare/task_arithmetic.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.DareTaskArithmetic
2 | scaling_factor: 0.3
3 | sparsity_ratio: 0.5
4 | only_on_linear_weights: false
5 | rescale: true
6 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: dtd
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: gtsrb
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: mnist
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: pcam
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: stl10
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: svhn
4 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/caching.md:
--------------------------------------------------------------------------------
1 | # Caching Utilities
2 | 
3 | ::: fusion_bench.utils.cache_utils
4 |     options:
5 |         show_root_full_path: true
6 |         heading_level: 3
7 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/filesystem.md:
--------------------------------------------------------------------------------
1 | # FileSystem Utilities
2 | 
3 | ::: fusion_bench.utils.path
4 |     options:
5 |         show_root_full_path: true
6 |         heading_level: 3
7 | 


--------------------------------------------------------------------------------
/fusion_bench/method/we_moe/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_we_moe import CLIPWeightEnsemblingMoEAlgorithm
3 | from .flan_t5_we_moe import FlanT5WeightEnsemblingMoEAlgorithm
4 | 


--------------------------------------------------------------------------------
/fusion_bench/metrics/model_kinship/__init__.py:
--------------------------------------------------------------------------------
1 | # Exploring Model Kinship for Merging LLMs
2 | # The implementation of this module is borrowed from: https://github.com/zjunlp/ModelKinship/
3 | 


--------------------------------------------------------------------------------
/fusion_bench/modelpool/seq_classification_lm/__init__.py:
--------------------------------------------------------------------------------
1 | from .reward_model import create_reward_model_from_pretrained
2 | from .seq_classification_lm import SequenceClassificationModelPool
3 | 


--------------------------------------------------------------------------------
/fusion_bench/utils/set.py:
--------------------------------------------------------------------------------
1 | __all__ = ["union"]
2 | 
3 | 
4 | def union(*iters) -> set:
5 |     if len(iters) == 0:
6 |         return set()
7 |     s = set().union(*iters)
8 |     return s
9 | 


--------------------------------------------------------------------------------
/config/_get_started/greeting_program.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench._get_started.greeting_program.GreetingProgram
2 | message: "Welcome to FusionBench"
3 | name: "Developer"
4 | repeat_count: 3
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-qqp.yaml:
--------------------------------------------------------------------------------
1 | glue-qqp:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-qqp
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-rte.yaml:
--------------------------------------------------------------------------------
1 | glue-rte:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-rte
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: cifar10
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: eurosat
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: fer2013
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: food101
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: kmnist
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: sun397
4 | 


--------------------------------------------------------------------------------
/docs/algorithms/pruning/images/llama_2_4_semistructued_first_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/algorithms/pruning/images/llama_2_4_semistructued_first_layer.png


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/modelscope.md:
--------------------------------------------------------------------------------
1 | # ModelScope Integration
2 | 
3 | ::: fusion_bench.utils.modelscope
4 |     options:
5 |         show_root_full_path: true
6 |         heading_level: 3
7 | 


--------------------------------------------------------------------------------
/fusion_bench/method/depth_upscaling/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .depth_upscaling import DepthUpscalingAlgorithm
3 | from .depth_upscaling_for_llama import DepthUpscalingForLlama
4 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/mnist.py:
--------------------------------------------------------------------------------
1 | classnames = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
2 | 
3 | templates = [
4 |     lambda c: f'a photo of the number: "{c}".',
5 | ]
6 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/svhn.py:
--------------------------------------------------------------------------------
1 | classnames = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
2 | 
3 | templates = [
4 |     lambda c: f'a photo of the number: "{c}".',
5 | ]
6 | 


--------------------------------------------------------------------------------
/config/method/pruning/magnitude_diff_pruning.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.MagnitudeDiffPruningAlgorithm
2 | prune_ratio: 0.5
3 | rescale: false
4 | extract_names: null
5 | prune_type: minor
6 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-cola.yaml:
--------------------------------------------------------------------------------
1 | glue-cola:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-cola
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-mnli.yaml:
--------------------------------------------------------------------------------
1 | glue-mnli:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-mnli
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-mrpc.yaml:
--------------------------------------------------------------------------------
1 | glue-mrpc:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-mrpc
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-qnli.yaml:
--------------------------------------------------------------------------------
1 | glue-qnli:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-qnli
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-sst2.yaml:
--------------------------------------------------------------------------------
1 | glue-sst2:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-sst2
4 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-stsb.yaml:
--------------------------------------------------------------------------------
1 | glue-stsb:
2 |   _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
3 |   pretrained_model_name_or_path: tanganke/flan-t5-base_glue-stsb
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: cifar100
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: resisc45
4 | 


--------------------------------------------------------------------------------
/docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora_average.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanganke/fusion_bench/HEAD/docs/modelpool/images/clip-vit-base-patch16_full&lora&l-lora_average.png


--------------------------------------------------------------------------------
/fusion_bench/method/regmean_plusplus/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_regmean_plusplus import RegMeanAlgorithmForCLIPPlusPlus
3 | from .regmean_plusplus import RegMeanAlgorithmPlusPlus
4 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/pcam.py:
--------------------------------------------------------------------------------
1 | classnames = ["lymph node", "lymph node containing metastatic tumor tissue"]
2 | 
3 | templates = [
4 |     lambda c: f"this is a photo of {c}",
5 | ]
6 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/test/the_eight_tasks.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - sun397
 3 |   - stanford-cars
 4 |   - resisc45
 5 |   - eurosat
 6 |   - svhn
 7 |   - gtsrb
 8 |   - mnist
 9 |   - dtd
10 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/the_eight_tasks.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - sun397
 3 |   - stanford-cars
 4 |   - resisc45
 5 |   - eurosat
 6 |   - svhn
 7 |   - gtsrb
 8 |   - mnist
 9 |   - dtd
10 | 


--------------------------------------------------------------------------------
/config/method/analysis/task_vector_violin_plot.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.TaskVectorViolinPlot
2 | trainable_only: true
3 | max_points_per_model: 1000
4 | fig_kwargs: null
5 | output_path: null
6 | 


--------------------------------------------------------------------------------
/fusion_bench/method/dare/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .simple_average import DareSimpleAverage
3 | from .task_arithmetic import DareTaskArithmetic
4 | from .ties_merging import DareTiesMerging
5 | 


--------------------------------------------------------------------------------
/fusion_bench/method/lm_finetune/__init__.py:
--------------------------------------------------------------------------------
1 | from .bradley_terry_rm import BradleyTerryRewardModeling
2 | from .fullfinetune_sft import FullFinetuneSFT
3 | from .peftfinetune_sft import PeftFinetuneSFT
4 | 


--------------------------------------------------------------------------------
/fusion_bench/method/moe_pruner/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .deepseek_v2 import (
2 |     MoEPrunerHookFnForDeepseekV2Gate,
3 |     MoEPrunerHookFnForDeepseekV2Linear,
4 | )
5 | from .hook import BaseHookFn
6 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/train/the_eight_tasks.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - sun397
 3 |   - stanford-cars
 4 |   - resisc45
 5 |   - eurosat
 6 |   - svhn
 7 |   - gtsrb
 8 |   - mnist
 9 |   - dtd
10 | 


--------------------------------------------------------------------------------
/config/dataset/question_answering/test/search_qa.yaml:
--------------------------------------------------------------------------------
1 | search_qa:
2 |   _target_: datasets.load_dataset
3 |   _args_:
4 |     - search_qa
5 |     - train_test_val
6 |   trust_remote_code: true
7 |   split: test
8 | 


--------------------------------------------------------------------------------
/config/dataset/text_generation/test/gsm-hard.yaml:
--------------------------------------------------------------------------------
1 | gsm-hard:
2 |   _target_: datasets.load_dataset
3 |   path: reasoning-machines/gsm-hard
4 |   split: train # this dataset is used to evaluate math reasoning
5 | 


--------------------------------------------------------------------------------
/config/method/analysis/task_vector_cos_similarity.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.TaskVectorCosSimilarity
2 | plot_heatmap: true
3 | trainable_only: true
4 | max_points_per_model: null
5 | output_path: null
6 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: emnist_letters
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: fashion_mnist
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: oxford-iiit-pet
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: rendered-sst2
4 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: stanford-cars
4 | 


--------------------------------------------------------------------------------
/fusion_bench/dataset/arc_agi/__init__.py:
--------------------------------------------------------------------------------
1 | from .arc_agi import (
2 |     load_tokenized_arc_agi_dataset,
3 |     load_tokenized_arc_agi_dataset_for_ttt,
4 |     process_task,
5 |     process_task_for_ttt,
6 | )
7 | 


--------------------------------------------------------------------------------
/fusion_bench/method/regmean/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_regmean import RegMeanAlgorithmForCLIP
3 | from .gpt2_regmean import RegMeanAlgorithmForGPT2
4 | from .regmean import RegMeanAlgorithm
5 | 


--------------------------------------------------------------------------------
/config/dataset/question_answering/train/search_qa.yaml:
--------------------------------------------------------------------------------
1 | search_qa:
2 |   _target_: datasets.load_dataset
3 |   _args_:
4 |     - search_qa
5 |     - train_test_val
6 |   trust_remote_code: true
7 |   split: train
8 | 


--------------------------------------------------------------------------------
/config/dataset/question_answering/val/search_qa.yaml:
--------------------------------------------------------------------------------
1 | search_qa:
2 |   _target_: datasets.load_dataset
3 |   _args_:
4 |     - search_qa
5 |     - train_test_val
6 |   trust_remote_code: true
7 |   split: validation
8 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/test@test_datasets: oxford_flowers102
4 | 


--------------------------------------------------------------------------------
/config/taskpool/nyuv2_taskpool.yaml:
--------------------------------------------------------------------------------
 1 | type: NYUv2TaskPool
 2 | data_dir: .cache
 3 | tasks:
 4 |   - segmentation
 5 |   - depth
 6 |   - normal
 7 | # per-GPU batch size
 8 | batch_size: 16
 9 | num_workers: 4
10 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelTaskPool@: _template
3 |   - /dataset/image_classification/val@test_datasets: oxford_flowers102
4 | 


--------------------------------------------------------------------------------
/examples/ada_svd/clip_vision.sh:
--------------------------------------------------------------------------------
1 | fusion_bench \
2 |     method=ada_svd/clip_vision \
3 |     modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8 \
4 |     taskpool=CLIPVisionModelTaskPool/clip-vit-base-patch32_TA8
5 | 


--------------------------------------------------------------------------------
/fusion_bench/method/smile_upscaling/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .singular_projection_merging import SingularProjectionMergingAlgorithm
3 | from .smile_upscaling import SmileMoELinear, SmileUpscalingAlgorithm
4 | 


--------------------------------------------------------------------------------
/examples/adamerging/gpt_2.sh:
--------------------------------------------------------------------------------
1 | # Layer-wise AdaMerigng for GPT-2
2 | fusion_bench \
3 |     method=adamerging/layer_wise_gpt2 \
4 |     method.max_steps=400 \
5 |     modelpool=test/test.yaml \
6 |     taskpool=test/test.yaml
7 | 
8 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mistral-7b.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.CausalLMPool
2 | models:
3 |   _pretrained_: mistralai/Mistral-7B-v0.1
4 | tokenizer: ${.models._pretrained_}
5 | model_kwargs:
6 |   torch_dtype: bfloat16
7 | 


--------------------------------------------------------------------------------
/config/nyuv2_config.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - fabric_model_fusion
3 |   - override method: simple_average
4 |   - override modelpool: nyuv2_modelpool
5 |   - override taskpool: nyuv2_taskpool
6 |   - _self_
7 | trainer:
8 |   devices: 1
9 | 


--------------------------------------------------------------------------------
/fusion_bench/method/moe_pruner/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of MoE-Pruner
3 | 
4 | MoE-Pruner: Pruning Mixture-of-Experts Large Language Model using the Hints from Its Router
5 | """
6 | 
7 | from .moe_pruner import MoEPruner
8 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/emnist_mnist.py:
--------------------------------------------------------------------------------
1 | # https://huggingface.co/datasets/tanganke/emnist_mnist
2 | classnames = [str(i) for i in range(10)]
3 | templates = [
4 |     lambda c: f'a photo of the number: "{c}".',
5 | ]
6 | 


--------------------------------------------------------------------------------
/config/dataset/llm_sft/alpaca_cleaned.yaml:
--------------------------------------------------------------------------------
1 | alpaca-cleaned:
2 |   _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
3 |   tokenizer: ???
4 |   path: "yahma/alpaca-cleaned"
5 |   split: train
6 |   cache_path: null
7 | 


--------------------------------------------------------------------------------
/fusion_bench/method/gossip/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip_layer_wise_gossip import CLIPLayerWiseGossipAlgorithm
2 | from .clip_task_wise_gossip import CLIPTaskWiseGossipAlgorithm
3 | from .flan_t5_layer_wise_gossip import FlanT5LayerWiseGossipAlgorithm
4 | 


--------------------------------------------------------------------------------
/config/dataset/text_generation/test/gsm8k_question_label.yaml:
--------------------------------------------------------------------------------
1 | qsm8k:
2 |   _target_: fusion_bench.dataset.gsm8k.load_gsm8k_question_label_dataset
3 |   dataset_name: test # this option can be 'train', 'test', 'train_socratic', and 'test_socratic'
4 | 


--------------------------------------------------------------------------------
/config/dataset/text_generation/train/gsm8k_question_label.yaml:
--------------------------------------------------------------------------------
1 | qsm8k:
2 |   _target_: fusion_bench.dataset.gsm8k.load_gsm8k_question_label_dataset
3 |   dataset_name: train # this option can be 'train', 'test', 'train_socratic', and 'test_socratic'
4 | 


--------------------------------------------------------------------------------
/config/llama_full_finetune.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - fabric_model_fusion
3 |   - override fabric: llama_fsdp
4 |   - override method: lm_finetune/fullfinetune_sft.yaml
5 |   - override modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
6 |   - _self_
7 | 


--------------------------------------------------------------------------------
/config/method/ada_svd/clip_vision.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.AdaSVDMergingForCLIPVisionModel
2 | scaling_factor: null
3 | num_samples: 256
4 | gate_k: 16
5 | average_experts: false
6 | device: cuda
7 | upscaling_accelerator: null
8 | seed: 0
9 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-cola_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-cola:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-cola_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-mnli_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-mnli:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-mnli_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-mrpc_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-mrpc:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-mrpc_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-qnli_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-qnli:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-qnli_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-qqp_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-qqp:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-qqp_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-rte_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-rte:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-rte_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-sst2_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-sst2:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-sst2_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-base_glue-stsb_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-stsb:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-base
4 |   peft_model_path: tanganke/flan-t5-base_glue-stsb_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-qqp_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-qqp:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-qqp_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-rte_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-rte:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-rte_lora-16
5 | 


--------------------------------------------------------------------------------
/config/fabric/loggers/wandb_logger.yaml:
--------------------------------------------------------------------------------
1 | # https://lightning.ai/docs/fabric/2.4.0/guide/loggers/wandb.html#weights-and-biases
2 | _target_: wandb.integration.lightning.fabric.WandbLogger
3 | project: ${hydra:job.config_name}
4 | save_dir: ${path.log_dir}
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-cola_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-cola:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-cola_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-mnli_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-mnli:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-mnli_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-mrpc_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-mrpc:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-mrpc_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-qnli_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-qnli:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-qnli_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-sst2_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-sst2:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-sst2_lora-16
5 | 


--------------------------------------------------------------------------------
/config/model/flan-t5/flan-t5-large_glue-stsb_lora-16.yaml:
--------------------------------------------------------------------------------
1 | glue-stsb:
2 |   _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
3 |   base_model_path: google/flan-t5-large
4 |   peft_model_path: tanganke/flan-t5-large_glue-stsb_lora-16
5 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
2 | _recursive_: False
3 | models:
4 |   _pretrained_: openai/clip-vit-base-patch32
5 | processor: ${.models._pretrained_}
6 | 


--------------------------------------------------------------------------------
/examples/gossip/flan_t5.sh:
--------------------------------------------------------------------------------
1 | # Layer-wise gossip
2 | fusion_bench \
3 |     method=gossip/layer_wise_flan_t5 \
4 |     method.lr=1e-3 \
5 |     modelpool=Seq2SeqLMPool/flan-t5-base_glue_lora16_tta \
6 |     taskpool=taskpool=flan-t5_glue_text_generation
7 | 


--------------------------------------------------------------------------------
/fusion_bench/method/opcm/__init__.py:
--------------------------------------------------------------------------------
1 | from .opcm import OPCMForCLIP
2 | from .task_arithmetic import ContinualTaskArithmeticForCLIP
3 | from .ties_merging import ContinualTiesMergingForCLIP
4 | from .weight_average import ContinualWeightAverageForCLIP
5 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_losparse_llama/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from . import register
3 | from .configuration_losparse_llama import LoSparseLlamaConfig
4 | from .modeling_losparse_llama import LoSparseLlamaForCausalLM, LoSparseLlamaModel
5 | 


--------------------------------------------------------------------------------
/fusion_bench/models/open_clip/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains the support for the open_clip model.
3 | Modified from https://github.com/nik-dim/tall_masks/
4 | """
5 | 
6 | from .modeling import ClassificationHead, ImageClassifier, ImageEncoder
7 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_mtl.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelPool@: _template
3 |   - /model/clip-vit@models:
4 |       - clip-vit-base-patch32
5 |   - /dataset/image_classification/train@train_datasets: the_eight_tasks
6 | 


--------------------------------------------------------------------------------
/examples/trust_region/READMD.md:
--------------------------------------------------------------------------------
1 | 
2 | ```bash
3 | fusion_bench \
4 |     method=trust_region/clip_task_arithmetic \
5 |     modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8 \
6 |     taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8
7 | ```
8 | 


--------------------------------------------------------------------------------
/config/method/classification/image_classification_finetune_test.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.classification.ImageClassificationFineTuning_Test
2 | checkpoint_path: null
3 | dataloader_kwargs:
4 |   batch_size: 256
5 |   num_workers: 4
6 |   pin_memory: true
7 | 


--------------------------------------------------------------------------------
/config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - Seq2SeqLMPool@: _template
3 | models:
4 |   _pretrained_:
5 |     _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
6 |     pretrained_model_name_or_path: google/flan-t5-base
7 | 


--------------------------------------------------------------------------------
/examples/gossip/clip.sh:
--------------------------------------------------------------------------------
1 | # Layer-wise gossip
2 | fusion_bench \
3 |     method=gossip/layer_wise_clip \
4 |     method.lr=1e-3 \
5 |     modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8  \
6 |     taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8
7 | 


--------------------------------------------------------------------------------
/config/method/trust_region/clip_task_arithmetic.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.trust_region.clip_task_arithmetic.TaskArithmeticWithTrustRegionForCLIP
2 | scaling_factor: 0.3
3 | threshold_quantile: 0.99
4 | max_samples: 128
5 | batch_size: 128
6 | zero_shot: false
7 | 


--------------------------------------------------------------------------------
/examples/adamerging/flan_t5_base.sh:
--------------------------------------------------------------------------------
1 | # Layer-wise adamerging
2 | fusion_bench \
3 |     method=adamerging/layer_wise_flan_t5 \
4 |     method.optimizer.lr=1e-3 \
5 |     modelpool=Seq2SeqLMPool/flan-t5-base_glue_lora16_tta \
6 |     taskpool=flan-t5_glue_text_generation
7 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_llama/__init__.py:
--------------------------------------------------------------------------------
1 | from . import register
2 | from .configuration_smile_llama import SmileLlamaConfig
3 | from .modeling_smile_llama import (
4 |     SmileLlamaDecoderLayer,
5 |     SmileLlamaForCausalLM,
6 |     SmileLlamaModel,
7 | )
8 | 


--------------------------------------------------------------------------------
/fusion_bench/method/mixture_of_experts/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .mixtral_merging import (
3 |     MixtralForCausalLMMergingAlgorithm,
4 |     MixtralForCausalLMUpscalingAlgorithm,
5 |     MixtralMoEMergingAlgorithm,
6 |     MixtralUpscalingAlgorithm,
7 | )
8 | 


--------------------------------------------------------------------------------
/fusion_bench/method/fisher_merging/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_fisher_merging import FisherMergingForCLIPVisionModel
3 | from .fisher_merging import FisherMergingAlgorithm, get_param_names_to_merge
4 | from .gpt2_fisher_merging import FisherMergingAlgorithmForGPT2
5 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_mistral/__init__.py:
--------------------------------------------------------------------------------
1 | from . import register
2 | from .configuration_smile_mistral import SmileMistralConfig
3 | from .modeling_smile_mistral import (
4 |     SmileMistralDecoderLayer,
5 |     SmileMistralForCausalLM,
6 |     SmileMistralModel,
7 | )
8 | 


--------------------------------------------------------------------------------
/tests/import_profile.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from pyinstrument import Profiler
 4 | 
 5 | 
 6 | async def main():
 7 |     p = Profiler(async_mode="disabled")
 8 | 
 9 |     with p:
10 |         import fusion_bench
11 | 
12 |     p.print()
13 | 
14 | 
15 | asyncio.run(main())
16 | 


--------------------------------------------------------------------------------
/docs/taskpool/dummy.md:
--------------------------------------------------------------------------------
 1 | # Dummy TaskPool
 2 | 
 3 | The `DummyTaskPool` is used for debugging purposes. 
 4 | It inherits from the base `TaskPool` class.
 5 | 
 6 | ## Reference
 7 | 
 8 | ::: fusion_bench.taskpool.dummy.DummyTaskPool
 9 |     options:
10 |         members: [evaluate]
11 | 


--------------------------------------------------------------------------------
/config/method/expert_sparsity/README.md:
--------------------------------------------------------------------------------
1 | Original repo: https://github.com/Lucky-Lance/Expert_Sparsity
2 | 
3 | Reference:
4 |     Not All Experts are Equal: Efficient Expert Pruning and Skipping for Mixture-of-Experts Large Language Models.
5 |     ACL 2024.
6 |     http://arxiv.org/abs/2402.14800
7 | 


--------------------------------------------------------------------------------
/config/method/task_singular_vector/TaskSingularVectorMerging.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.method.TaskSingularVectorMerging
2 | exclude_keys: null
3 | # alpha is a float or a list of floats
4 | # example:
5 | # alpha: 1
6 | # alpha: [1, 0.5, 0.25]
7 | alpha: 1
8 | return_single_task_models: false
9 | 


--------------------------------------------------------------------------------
/fusion_bench/method/lm_finetune/causal_lm_pretrain.py:
--------------------------------------------------------------------------------
1 | from fusion_bench import BaseAlgorithm
2 | from fusion_bench.modelpool import CausalLMPool
3 | 
4 | 
5 | class CausalLMPretrain(BaseAlgorithm):
6 |     def run(self, modelpool: CausalLMPool):
7 |         tokenizer = modelpool.load_tokenizer()
8 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/vicuna-7b-v1.5.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.CausalLMPool
2 | _recursive_: false
3 | models:
4 |   _pretrained_: meta-llama/Llama-2-7b-hf
5 |   finetuned_model: lmsys/vicuna-7b-v1.5
6 | model_kwargs:
7 |   torch_dtype: bfloat16
8 | tokenizer: ${.models.finetuned_model}
9 | 


--------------------------------------------------------------------------------
/config/modelpool/Seq2SeqLMPool/_template.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.Seq2SeqLMPool
2 | _recursive_: false
3 | _version_: "0.2"
4 | models: ???
5 | tokenizer:
6 |   _target_: transformers.AutoTokenizer.from_pretrained
7 |   pretrained_model_name_or_path: google/flan-t5-base
8 | model_kwargs: null
9 | 


--------------------------------------------------------------------------------
/config/fabric/loggers/swandb_logger.yaml:
--------------------------------------------------------------------------------
1 | #https://github.com/SwanHubX/SwanLab/blob/main/swanlab/integration/pytorch_lightning.py
2 | _target_: swandb.integration.pytorch_lightning.SwanLabLogger
3 | project: ${hydra:job.config_name}
4 | description: "SwanLab logger with FusionBench"
5 | save_dir: ${path.log_dir}
6 | 


--------------------------------------------------------------------------------
/config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_individual.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
2 | _recursive_: false
3 | model_dir: ./.cache/task_vectors_checkpoints/
4 | models:
5 |   _pretrained_:
6 |     model_name: ViT-B-32
7 |     pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
8 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/_template.yaml:
--------------------------------------------------------------------------------
 1 | _usage_: |
 2 |   defaults:
 3 |     - CLIPVisionModelPool@: _template
 4 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
 5 | _recursive_: False
 6 | models: ???
 7 | train_datasets: null
 8 | test_datasets: null
 9 | processor: openai/clip-vit-base-patch32
10 | 


--------------------------------------------------------------------------------
/config/method/fw_merging/fw_hard.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.method.FrankWolfeHardAlgorithm
 2 | merge_fn: task_arithmetic
 3 | max_iters: 10
 4 | step_size: 0.1
 5 | dataset_size: 100
 6 | tasks: []
 7 | init_weight: 
 8 | loss_fn: cross_entropy
 9 | scaling_factor: 0.3
10 | max_num_models: 100
11 | granularity: task
12 | 


--------------------------------------------------------------------------------
/config/method/fw_merging/fw_soft.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.method.FrankWolfeSoftAlgorithm
 2 | init_weight:
 3 | max_iters: 10
 4 | merge_fn: 'adamerging'
 5 | tasks:
 6 | ada_iters: 500
 7 | dataset_size: 100
 8 | ada_coeff: 1e-8
 9 | step_size: 0.1
10 | max_num_models: 100
11 | granularity: task
12 | ada_loss: entropy_loss


--------------------------------------------------------------------------------
/docs/api/fusion_bench.method/ensemble.md:
--------------------------------------------------------------------------------
 1 | # Model Ensemble
 2 | 
 3 | ::: fusion_bench.method
 4 |     options:
 5 |         show_root_heading: false
 6 |         heading_level: 2
 7 |         members:
 8 |         - SimpleEnsembleAlgorithm
 9 |         - WeightedEnsembleAlgorithm
10 |         - MaxModelPredictorAlgorithm
11 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/logging.md:
--------------------------------------------------------------------------------
 1 | # Logging Utilities
 2 | 
 3 | ::: fusion_bench.utils.rich_utils
 4 |     options:
 5 |         show_root_full_path: true
 6 |         heading_level: 3
 7 | 
 8 | ::: fusion_bench.utils.pylogger
 9 |     options:
10 |         show_root_full_path: true
11 |         heading_level: 3
12 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_qwen2/__init__.py:
--------------------------------------------------------------------------------
1 | from . import register
2 | from .configuration_smile_qwen2 import SmileQwen2Config
3 | from .modeling_smile_qwen2 import (
4 |     SmileQwen2ForCausalLM,
5 |     SmileQwen2ForQuestionAnswering,
6 |     SmileQwen2ForSequenceClassification,
7 |     SmileQwen2Model,
8 | )
9 | 


--------------------------------------------------------------------------------
/docs/taskpool/LlamaTestGenerationTaskPool.md:
--------------------------------------------------------------------------------
1 | # LlamaTestGenerationTaskPool
2 | 
3 | The `LlamaTestGenerationTaskPool` class is used to evaluate a language model on a set of prompts. It can also be used in an interactive mode for debugging purposes.
4 | 
5 | ## References
6 | 
7 | ::: fusion_bench.taskpool.llama.test_generation
8 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/dtd.yaml:
--------------------------------------------------------------------------------
 1 | dtd:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: tanganke/dtd
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/mnist.yaml:
--------------------------------------------------------------------------------
 1 | mnist:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: mnist
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelPool@: _template
3 |   - /model/clip-vit@models: clip-vit-base-patch16_eight_tasks
4 | processor:
5 |   _target_: transformers.CLIPProcessor.from_pretrained
6 |   pretrained_model_name_or_path: openai/clip-vit-base-patch16
7 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | _recursive_: false
 3 | models:
 4 |   _pretrained_: Qwen/Qwen2.5-7B
 5 |   math: Qwen/Qwen2.5-Math-7B
 6 |   code: Qwen/Qwen2.5-Coder-7B
 7 | model_kwargs:
 8 |   torch_dtype: bfloat16
 9 | tokenizer: Qwen/Qwen2.5-7B
10 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/package_management.md:
--------------------------------------------------------------------------------
 1 | # Package Management
 2 | 
 3 | ::: fusion_bench.utils.packages
 4 |     options:
 5 |         show_root_full_path: true
 6 |         heading_level: 3
 7 | 
 8 | ::: fusion_bench.utils.lazy_imports
 9 |     options:
10 |         show_root_full_path: true
11 |         heading_level: 3


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # tests Folder
2 | 
3 | This folder contains all the unit tests for the project. The tests are designed to ensure the functionality and reliability of the codebase. To run the tests, use the following command:
4 | 
5 | ```shell
6 | # Run all tests
7 | python -m unittest discover -v -s ./tests -p "test_*.py"
8 | ```
9 | 


--------------------------------------------------------------------------------
/config/hydra/default.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override help: fusion_bench_help
 3 |   - override job_logging: rich_logging
 4 | run:
 5 |   dir: ${path.log_dir}
 6 | sweep:
 7 |   dir: ${path.log_dir}
 8 |   subdir: ${hydra.job.num}
 9 | job:
10 |   env_set:
11 |     HYDRA_FULL_ERROR: ${oc.env:HYDRA_FULL_ERROR,1}
12 | output_subdir: ""
13 | 


--------------------------------------------------------------------------------
/config/method/wudi/wudi.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: WUDI Merging
3 | # =============================================================================
4 | _target_: fusion_bench.method.WUDIMerging
5 | 
6 | iter_num: 400
7 | exclude_keys: null
8 | 


--------------------------------------------------------------------------------
/fusion_bench/taskpool/clip_vision/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .clip_rankone_moe_taskpool import RankoneMoECLIPVisionModelTaskPool
3 | from .clip_smile_taskpool import SmileCLIPVisionModelTaskPool
4 | from .clip_sparse_wemoe_taskpool import SparseWEMoECLIPVisionModelTaskPool
5 | from .taskpool import CLIPVisionModelTaskPool
6 | 


--------------------------------------------------------------------------------
/.vscode/init.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | SCRIPT_DIR=$(cd $(dirname $0); pwd)
 3 | 
 4 | for file in launch.json settings.json
 5 | do
 6 |     if [ -f ${SCRIPT_DIR}/${file} ]; then
 7 |         echo "File ${file} already exists, skipping"
 8 |     else
 9 |         cp -v ${SCRIPT_DIR}/${file}.template ${SCRIPT_DIR}/${file}
10 |     fi
11 | done
12 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/gtsrb.yaml:
--------------------------------------------------------------------------------
 1 | gtsrb:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: tanganke/gtsrb
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/sun397.yaml:
--------------------------------------------------------------------------------
 1 | sun397:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: tanganke/sun397
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
2 | _recursive_: False
3 | processor: openai/clip-vit-base-patch32
4 | models:
5 |   _pretrained_: openai/clip-vit-base-patch32
6 |   finetuned: tanganke/clip-vit-base-patch32_stanford-cars
7 | platform: hf
8 | 


--------------------------------------------------------------------------------
/fusion_bench/dataset/imdb.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import Any, Dict, List, Optional
 4 | 
 5 | from datasets import load_dataset, load_from_disk
 6 | from transformers import PreTrainedTokenizer
 7 | from trl import SFTConfig, SFTTrainer
 8 | 
 9 | import fusion_bench
10 | 
11 | log = logging.getLogger(__name__)
12 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/eurosat.yaml:
--------------------------------------------------------------------------------
 1 | eurosat:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: tanganke/eurosat
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/resisc45.yaml:
--------------------------------------------------------------------------------
 1 | resisc45:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: tanganke/resisc45
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/examples/randes/clip-vit-base-patch32.sh:
--------------------------------------------------------------------------------
1 | fusion_bench \
2 |     fabric.loggers.name=randes_modelsoup/ViT-B-32_TA8 \
3 |     method=randes/superposed_model_soup \
4 |     method.mode=identity_matrix \
5 |     modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only \
6 |     taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8
7 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - CLIPVisionModelTaskPool@: _template
 3 |   - /dataset/image_classification/test@test_datasets:
 4 |       - sun397
 5 |       - stanford-cars
 6 |       - resisc45
 7 |       - eurosat
 8 |       - svhn
 9 |       - gtsrb
10 |       - mnist
11 |       - dtd
12 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/stanford-cars.yaml:
--------------------------------------------------------------------------------
 1 | stanford-cars:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     path: tanganke/stanford_cars
 6 |     split: train
 7 |   validation_fraction: 0.1
 8 |   validation_size: null
 9 |   random_seed: 0
10 |   return_split: val
11 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | _recursive_: false
 3 | models:
 4 |   _pretrained_: Qwen/Qwen2.5-1.5B
 5 |   expert_1: Qwen/Qwen2.5-Math-1.5B
 6 |   expert_2: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 7 | model_kwargs:
 8 |   torch_dtype: bfloat16
 9 | tokenizer: Qwen/Qwen2.5-1.5B
10 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/torch.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Utilities
 2 | 
 3 | ## Device Management
 4 | 
 5 | ::: fusion_bench.utils.devices
 6 |     options:
 7 |         show_root_full_path: true
 8 |         heading_level: 3
 9 | 
10 | ## Dtype
11 | 
12 | ::: fusion_bench.utils.dtype
13 |     options:
14 |         show_root_full_path: true
15 |         heading_level: 3


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/stl10.py:
--------------------------------------------------------------------------------
 1 | classnames = [
 2 |     "airplane",
 3 |     "bird",
 4 |     "car",
 5 |     "cat",
 6 |     "deer",
 7 |     "dog",
 8 |     "horse",
 9 |     "monkey",
10 |     "ship",
11 |     "truck",
12 | ]
13 | 
14 | templates = [
15 |     lambda c: f"a photo of a {c}.",
16 |     lambda c: f"a photo of the {c}.",
17 | ]
18 | 


--------------------------------------------------------------------------------
/fusion_bench/method/expert_sparsity/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Original repo: https://github.com/Lucky-Lance/Expert_Sparsity
 3 | 
 4 | Reference:
 5 |     Not All Experts are Equal: Efficient Expert Pruning and Skipping for Mixture-of-Experts Large Language Models.
 6 |     ACL 2024.
 7 |     http://arxiv.org/abs/2402.14800
 8 | """
 9 | 
10 | from .mixtral import *
11 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/val/svhn.yaml:
--------------------------------------------------------------------------------
 1 | svhn:
 2 |   _target_: fusion_bench.utils.data.train_validation_split
 3 |   dataset:
 4 |     _target_: datasets.load_dataset
 5 |     _args_:
 6 |       - svhn
 7 |       - cropped_digits
 8 |     split: train
 9 |   validation_fraction: 0.1
10 |   validation_size: null
11 |   random_seed: 0
12 |   return_split: val
13 | 


--------------------------------------------------------------------------------
/config/fabric/strategy/llama_fsdp.yaml:
--------------------------------------------------------------------------------
1 | _target_: lightning.fabric.strategies.FSDPStrategy
2 | sharding_strategy: FULL_SHARD
3 | cpu_offload: false
4 | auto_wrap_policy:
5 |   _target_: fusion_bench.mixins.lightning_fabric.get_policy
6 |   _args_:
7 |     - transformers.models.llama.modeling_llama.LlamaDecoderLayer
8 | activation_checkpointing_policy: ${.auto_wrap_policy}
9 | 


--------------------------------------------------------------------------------
/config/method/regmean/regmean.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: RegMean (Base)
3 | # =============================================================================
4 | _target_: ???
5 | num_regmean_examples: 256
6 | reduce_non_diagonal_ratio: 0.1
7 | exclude_param_names_regex: []
8 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
2 | _recursive_: False
3 | processor: openai/clip-vit-base-patch32
4 | models:
5 |   _pretrained_: openai/clip-vit-base-patch32
6 |   svhn: tanganke/clip-vit-base-patch32_svhn
7 |   mnist: tanganke/clip-vit-base-patch32_mnist
8 | platform: hf
9 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_code.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | _recursive_: false
 3 | enable_lazy_loading: true
 4 | models:
 5 |   _pretrained_: Qwen/Qwen2.5-1.5B
 6 |   math: Qwen/Qwen2.5-Math-1.5B
 7 |   code: Qwen/Qwen2.5-Coder-1.5B
 8 | model_kwargs:
 9 |   torch_dtype: bfloat16
10 | tokenizer: Qwen/Qwen2.5-1.5B
11 | 


--------------------------------------------------------------------------------
/fusion_bench/method/pruning/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F401
2 | from .llama_magnitude_prune import MagnitudePruningForLlama
3 | from .llama_random_prune import RandomPruningForLlama
4 | from .llama_sparsegpt_prune import SparseGPTPruningForLlama
5 | from .llama_wanda_prune import WandaPruningForLlama
6 | from .magnitude_diff_pruning import MagnitudeDiffPruningAlgorithm
7 | 


--------------------------------------------------------------------------------
/config/modelpool/ConvNextForImageClassification/convnext-base-224.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.ConvNextForImageClassificationPool
 2 | _recursive_: False
 3 | models:
 4 |   _pretrained_:
 5 |     config_path: facebook/convnext-base-224
 6 |     pretrained: true
 7 |     dataset_name: null
 8 | train_datasets: null
 9 | val_datasets: null
10 | test_datasets: null
11 | 


--------------------------------------------------------------------------------
/config/modelpool/automodelpool.yaml:
--------------------------------------------------------------------------------
 1 | type: AutoModelPool
 2 | models:
 3 |   - name: _pretrained_
 4 |     path: path_to_your_pretrained_model
 5 |   - name: model_1
 6 |     path: path_to_your_model_1
 7 |   - name: model_2
 8 |     path: path_to_your_model_2
 9 |   - name: model_3
10 |     path: path_to_your_model_3
11 |   - name: model_4
12 |     path: path_to_your_model_4
13 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_gemma2/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import register
 2 | from .configuration_smile_gemma2 import SmileGemma2Config
 3 | from .modeling_smile_gemma2 import (
 4 |     SmileGemma2ForCausalLM,
 5 |     SmileGemma2ForSequenceClassification,
 6 |     SmileGemma2ForTokenClassification,
 7 |     SmileGemma2Model,
 8 |     SmileGemma2PreTrainedModel,
 9 | )
10 | 


--------------------------------------------------------------------------------
/config/dataset/image_classification/README.md:
--------------------------------------------------------------------------------
1 | # Image Classification Dataset Configurations
2 | 
3 | This folder contains the dataset configuration for image classification tasks.
4 | 
5 | - Each dataset should have 'image' and 'label' columns.
6 | - If a dataset has no test split, we will use the validation split as the test split and create the validation set from the training set.
7 | 


--------------------------------------------------------------------------------
/fusion_bench/mixins/openclip_classification.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from fusion_bench.mixins import LightningFabricMixin
 4 | from fusion_bench.models.open_clip import ImageClassifier, ImageEncoder
 5 | 
 6 | log = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class OpenCLIPClassificationMixin(LightningFabricMixin):
10 |     _train_processor = None
11 |     _test_processor = None
12 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_val.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - CLIPVisionModelTaskPool@: _template
 3 |   # use validation set as test set
 4 |   - /dataset/image_classification/val@test_datasets:
 5 |       - sun397
 6 |       - stanford-cars
 7 |       - resisc45
 8 |       - eurosat
 9 |       - svhn
10 |       - gtsrb
11 |       - mnist
12 |       - dtd
13 | 


--------------------------------------------------------------------------------
/fusion_bench/method/task_singular_vector/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module is modified from the original code of the paper:
 3 | 
 4 | - Gargiulo, et.al. Task Singular Vectors: Reducing Task Interference in Model Merging
 5 |     - http://arxiv.org/abs/2412.00081
 6 |     - https://github.com/AntoAndGar/task_singular_vectors/
 7 | """
 8 | 
 9 | from .TSVM import TaskSingularVectorMerging
10 | 


--------------------------------------------------------------------------------
/config/modelpool/Dinov2ForImageClassification/dinov2-base-imagenet1k-1-layer.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.Dinov2ForImageClassificationPool
 2 | _recursive_: False
 3 | models:
 4 |   _pretrained_:
 5 |     config_path: facebook/dinov2-base-imagenet1k-1-layer
 6 |     pretrained: true
 7 |     dataset_name: null
 8 | train_datasets: null
 9 | val_datasets: null
10 | test_datasets: null
11 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_with_control_task.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - CLIPVisionModelTaskPool@: _template
 3 |   - /dataset/image_classification/test@test_datasets:
 4 |       - tiny-imagenet
 5 |       - sun397
 6 |       - stanford-cars
 7 |       - resisc45
 8 |       - eurosat
 9 |       - svhn
10 |       - gtsrb
11 |       - mnist
12 |       - dtd
13 | 


--------------------------------------------------------------------------------
/fusion_bench/method/pruning/wanda_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is modified from https://github.com/locuslab/wanda.
3 | 
4 | It contains utility functions and classes for pruning neural network models using the Wanda method.
5 | The WANDA method is a weight pruning technique that aims to reduce the number of parameters in a neural network
6 | while maintaining its performance.
7 | """
8 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml:
--------------------------------------------------------------------------------
1 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
2 | _recursive_: false
3 | processor: openai/clip-vit-base-patch32
4 | models:
5 |   _pretrained_: openai/clip-vit-base-patch32
6 |   sun397: tanganke/clip-vit-base-patch32_sun397
7 |   stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
8 | platform: hf
9 | 


--------------------------------------------------------------------------------
/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_L14.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - CLIPVisionModelTaskPool@: _template
 3 |   - /dataset/image_classification/test@test_datasets:
 4 |       - sun397
 5 |       - stanford-cars
 6 |       - resisc45
 7 |       - eurosat
 8 |       - svhn
 9 |       - gtsrb
10 |       - mnist
11 |       - dtd
12 | base_model: openai/clip-vit-large-patch14
13 | 


--------------------------------------------------------------------------------
/docs/taskpool/gpt2_classification.md:
--------------------------------------------------------------------------------
1 | # GPT-2 Sequence Classification Tasks
2 | 
3 | This task pool provides a set of sequence classification tasks from the GLUE benchmark for the GPT-2 model. 
4 | Each task is associated with a dataset and the accuracy metric. The tasks are:
5 | CoLA, MNLI, MRPC, QNLI, QQP, RTE, and SST2.
6 | 
7 | ## References
8 | 
9 | ::: fusion_bench.taskpool.gpt2_text_classification


--------------------------------------------------------------------------------
/fusion_bench/method/task_singular_vector/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from fusion_bench.method.ties_merging.ties_merging_utils import (
2 |     check_parameterNamesMatch,
3 |     check_state_dicts_equal,
4 | )
5 | from fusion_bench.utils import state_dict_to_vector, vector_to_state_dict
6 | 
7 | from . import TSVC_utils, TSVM_utils
8 | from .task_singular_interference import compute_task_singular_interference
9 | 


--------------------------------------------------------------------------------
/config/modelpool/smile_mistral_exp_v1.yaml:
--------------------------------------------------------------------------------
 1 | type: AutoModelForCausalLMPool
 2 | # each model should have a name and a path, and the model is loaded from the path
 3 | # this is equivalent to `AutoModelForCausalLM.from_pretrained(path)`
 4 | models:
 5 |   - name: _pretrained_
 6 |     path: mistralai/Mistral-7B-v0.1
 7 |   - name: expert_1
 8 |     path: meta-math/MetaMath-Mistral-7B
 9 | dtype: float16
10 | 


--------------------------------------------------------------------------------
/config/method/pruning/llama_random_pruning.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.method.RandomPruningForLlama
 2 | prune_type: unstructured
 3 | # === options for unstructured pruning ===
 4 | # `sparsity_ratio` is the ratio of weights to be pruned, 1 means all weights are pruned
 5 | sparsity_ratio: 0.5
 6 | # === options for semistructured pruning ===
 7 | # 2:4 means 2 out of 4 weights are pruned
 8 | n: 2
 9 | m: 4
10 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch16_eight_tasks.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - clip-vit-base-patch16
 3 |   - clip-vit-base-patch16_sun397
 4 |   - clip-vit-base-patch16_stanford-cars
 5 |   - clip-vit-base-patch16_resisc45
 6 |   - clip-vit-base-patch16_eurosat
 7 |   - clip-vit-base-patch16_svhn
 8 |   - clip-vit-base-patch16_gtsrb
 9 |   - clip-vit-base-patch16_mnist
10 |   - clip-vit-base-patch16_dtd
11 | 


--------------------------------------------------------------------------------
/set_cache_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | if [ -d /mnt/huggingface_cache ]; then
 6 |     export HF_HOME=/mnt/huggingface_cache/
 7 | else
 8 |     export HF_HOME=$SCRIPT_DIR/.cache/huggingface
 9 | fi
10 | 
11 | echo "HF_HOME set to $HF_HOME"
12 | 
13 | # if `HF_HOME` does not exist, create it
14 | if [ ! -d $HF_HOME ]; then
15 |     mkdir -p $HF_HOME
16 | fi
17 | 


--------------------------------------------------------------------------------
/config/fabric/strategy/llama_peft_fsdp.yaml:
--------------------------------------------------------------------------------
1 | _target_: lightning.fabric.strategies.FSDPStrategy
2 | sharding_strategy: FULL_SHARD
3 | state_dict_type: full # Save a single, consolidated checkpoint file
4 | cpu_offload: false
5 | auto_wrap_policy:
6 |   _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy
7 | activation_checkpointing_policy: ${.auto_wrap_policy}
8 | # limit_all_gathers: true
9 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | _recursive_: false
 3 | enable_lazy_loading: true
 4 | models:
 5 |   _pretrained_: Qwen/Qwen2.5-1.5B
 6 |   math: Qwen/Qwen2.5-Math-1.5B
 7 |   code: Qwen/Qwen2.5-Coder-1.5B
 8 |   instruction: Qwen/Qwen2.5-1.5B-Instruct
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: Qwen/Qwen2.5-1.5B
12 | 


--------------------------------------------------------------------------------
/config/modelpool/smile_mistral_exp_v3.yaml:
--------------------------------------------------------------------------------
 1 | type: AutoModelForCausalLMPool
 2 | # each model should have a name and a path, and the model is loaded from the path
 3 | # this is equivalent to `AutoModelForCausalLM.from_pretrained(path)`
 4 | models:
 5 |   - name: _pretrained_
 6 |     path: mistralai/Mistral-7B-v0.1
 7 |   - name: expert_1
 8 |     path: uukuguy/speechless-code-mistral-7b-v1.0
 9 | dtype: float16
10 | 


--------------------------------------------------------------------------------
/config/fabric/strategy/deepspeed.yaml:
--------------------------------------------------------------------------------
 1 | # https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy
 2 | _target_: lightning.fabric.strategies.DeepSpeedStrategy
 3 | accelerator: null
 4 | zero_optimization: true
 5 | stage: 2
 6 | offload_optimizer: false
 7 | offload_parameters: false
 8 | offload_params_device: "cpu"
 9 | offload_optimizer_device: "cpu"
10 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-large-patch14_eight_tasks.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - clip-vit-large-patch14
 3 |   - clip-vit-large-patch14_sun397
 4 |   - clip-vit-large-patch14_stanford-cars
 5 |   - clip-vit-large-patch14_resisc45
 6 |   - clip-vit-large-patch14_eurosat
 7 |   - clip-vit-large-patch14_svhn
 8 |   - clip-vit-large-patch14_gtsrb
 9 |   - clip-vit-large-patch14_mnist
10 |   - clip-vit-large-patch14_dtd
11 | 


--------------------------------------------------------------------------------
/config/modelpool/smile_mistral_exp_v2.yaml:
--------------------------------------------------------------------------------
 1 | type: AutoModelForCausalLMPool
 2 | # each model should have a name and a path, and the model is loaded from the path
 3 | # this is equivalent to `AutoModelForCausalLM.from_pretrained(path)`
 4 | models:
 5 |   - name: _pretrained_
 6 |     path: mistralai/Mistral-7B-v0.1
 7 |   - name: expert_1
 8 |     path: cognitivecomputations/dolphin-2.1-mistral-7b
 9 | dtype: float16
10 | 


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_all.sh:
--------------------------------------------------------------------------------
 1 | for script in \
 2 |     evaluate_gemma-2-9b.sh \
 3 |     evaluate_gemma-2-9b-it.sh \
 4 |     evaluate_Llama-3.1-8B.sh \
 5 |     evaluate_Llama-3.1-8B-Instruct.sh \
 6 |     evaluate_gemma-2-2b.sh \
 7 |     evaluate_gemma-2-2b-it.sh \
 8 |     evaluate_Llama-3.2-3B.sh \
 9 |     evaluate_Llama-3.2-3B-Instruct.sh; do
10 |     echo "Running $script"
11 |     bash $script
12 | done
13 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/kmnist.py:
--------------------------------------------------------------------------------
 1 | classnames_mapping = {
 2 |     "0": "お",
 3 |     "1": "き",
 4 |     "2": "す",
 5 |     "3": "つ",
 6 |     "4": "な",
 7 |     "5": "は",
 8 |     "6": "ま",
 9 |     "7": "や",
10 |     "8": "れ",
11 |     "9": "を",
12 | }
13 | classnames = [classnames_mapping[str(c)] for c in range(10)]
14 | 
15 | templates = [
16 |     lambda c: f"a photo of the character {c}.",
17 | ]
18 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | _recursive_: false
 3 | enable_lazy_loading: true
 4 | models:
 5 |   _pretrained_: meta-llama/Llama-2-7b-hf
 6 |   chat: meta-llama/Llama-2-7b-chat-hf
 7 |   math: WizardLMTeam/WizardMath-7B-V1.0
 8 |   code: codellama/CodeLlama-7b-hf
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: meta-llama/Llama-2-7b-hf
12 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mixtral_moe_merging.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: path_to_your_pretrained_model
 4 |   expert_1: path_to_your_expert_model_1
 5 |   expert_2: path_to_your_expert_model_2
 6 |   expert_3: path_to_your_expert_model_3
 7 |   expert_4: path_to_your_expert_model_4
 8 | tokenizer: ${.models._pretrained_}
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | 


--------------------------------------------------------------------------------
/docs/javascripts/mathjax.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |     tex: {
 3 |         inlineMath: [["\\(", "\\)"]],
 4 |         displayMath: [["\\[", "\\]"]],
 5 |         processEscapes: true,
 6 |         processEnvironments: true
 7 |     },
 8 |     options: {
 9 |         ignoreHtmlClass: ".*|",
10 |         processHtmlClass: "arithmatex"
11 |     }
12 | };
13 | 
14 | document$.subscribe(() => {
15 |     MathJax.typesetPromise()
16 | })


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_gemma-2-2b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "google/gemma-2-2b"
 9 |   "MergeBench/gemma-2-2b_instruction"
10 |   "MergeBench/gemma-2-2b_math"
11 |   "MergeBench/gemma-2-2b_coding"
12 |   "MergeBench/gemma-2-2b_multilingual"
13 |   "MergeBench/gemma-2-2b_safety"
14 | )
15 | 
16 | evaluate_all_models
17 | 


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_gemma-2-9b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "google/gemma-2-9b"
 9 |   "MergeBench/gemma-2-9b_instruction"
10 |   "MergeBench/gemma-2-9b_math"
11 |   "MergeBench/gemma-2-9b_coding"
12 |   "MergeBench/gemma-2-9b_multilingual"
13 |   "MergeBench/gemma-2-9b_safety"
14 | )
15 | 
16 | evaluate_all_models
17 | 


--------------------------------------------------------------------------------
/config/method/mixtral_moe_merging.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: Mixtral MoE Merging/Upscaling
3 | # =============================================================================
4 | name: mixtral_moe_upscaling # or "mixtral_for_causal_lm_moe_upscaling"
5 | experts_per_token: 2
6 | # path to save the upscaled model
7 | save_checkpoint: null
8 | 


--------------------------------------------------------------------------------
/config/method/linear/task_arithmetic_for_causallm.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: Task Arithmetic (Causal LM)
3 | # =============================================================================
4 | _target_: fusion_bench.method.TaskArithmeticForCausalLM
5 | scaling_factor: 0.3
6 | merge_backbone: false
7 | model_save_path: ${path.log_dir}/checkpoint
8 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | _recursive_: false
 3 | models:
 4 |   _pretrained_: mistralai/Mistral-7B-v0.1
 5 |   expert_1: meta-math/MetaMath-Mistral-7B
 6 |   expert_2: cognitivecomputations/dolphin-2.1-mistral-7b
 7 |   expert_3: uukuguy/speechless-code-mistral-7b-v1.0
 8 | model_kwargs:
 9 |   torch_dtype: bfloat16
10 | tokenizer: mistralai/Mistral-7B-v0.1
11 | 


--------------------------------------------------------------------------------
/config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - Seq2SeqLMPool@: _template
 3 |   - /model/flan-t5@models:
 4 |       - flan-t5-base
 5 |       - flan-t5-base_glue-cola
 6 |       - flan-t5-base_glue-mnli
 7 |       - flan-t5-base_glue-mrpc
 8 |       - flan-t5-base_glue-qnli
 9 |       - flan-t5-base_glue-qqp
10 |       - flan-t5-base_glue-rte
11 |       - flan-t5-base_glue-sst2
12 |       - flan-t5-base_glue-stsb
13 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.program.md:
--------------------------------------------------------------------------------
 1 | # fusion_bench.program
 2 | 
 3 | ## Class Definitions
 4 | 
 5 | - [fusion_bench.programs.BaseHydraProgram][]: Base class for Hydra-based programs in FusionBench.
 6 | - [fusion_bench.programs.FabricModelFusionProgram][]: A program for fusing models using Lightning Fabric.
 7 | 
 8 | ## References
 9 | 
10 | ::: fusion_bench.programs.BaseHydraProgram
11 | ::: fusion_bench.programs.FabricModelFusionProgram
12 | 


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_Llama-3.1-8B.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "meta-llama/Llama-3.1-8B"
 9 |   "MergeBench/Llama-3.1-8B_instruction"
10 |   "MergeBench/Llama-3.1-8B_math"
11 |   "MergeBench/Llama-3.1-8B_coding"
12 |   "MergeBench/Llama-3.1-8B_multilingual"
13 |   "MergeBench/Llama-3.1-8B_safety"
14 | )
15 | 
16 | evaluate_all_models
17 | 


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_Llama-3.2-3B.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "meta-llama/Llama-3.2-3B"
 9 |   "MergeBench/Llama-3.2-3B_instruction"
10 |   "MergeBench/Llama-3.2-3B_math"
11 |   "MergeBench/Llama-3.2-3B_coding"
12 |   "MergeBench/Llama-3.2-3B_multilingual"
13 |   "MergeBench/Llama-3.2-3B_safety"
14 | )
15 | 
16 | evaluate_all_models
17 | 


--------------------------------------------------------------------------------
/config/fabric/loggers/csv_logger.yaml:
--------------------------------------------------------------------------------
 1 | _target_: lightning.fabric.loggers.CSVLogger
 2 | # the logs directory would be `root_dir/name/version_X`
 3 | # for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
 4 | 
 5 | # root directory for all logging
 6 | root_dir: ${path.log_dir}
 7 | # the name of the experiment
 8 | name: ""
 9 | version: ""
10 | prefix: ""
11 | flush_logs_every_n_steps: 100
12 | 


--------------------------------------------------------------------------------
/fusion_bench/constants/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa F401
 2 | import importlib.metadata
 3 | 
 4 | from .paths import *
 5 | from .runtime import RuntimeConstants
 6 | 
 7 | # fusionbench version
 8 | try:
 9 |     FUSION_BENCH_VERSION = importlib.metadata.version("fusion-bench")
10 | except importlib.metadata.PackageNotFoundError:
11 |     # Fallback when package is not installed (e.g., during development)
12 |     FUSION_BENCH_VERSION = "0.0.0.dev"
13 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_llama/register.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
2 | 
3 | from .configuration_smile_llama import SmileLlamaConfig
4 | from .modeling_smile_llama import SmileLlamaForCausalLM, SmileLlamaModel
5 | 
6 | AutoConfig.register("smile_llama", SmileLlamaConfig)
7 | AutoModel.register(SmileLlamaConfig, SmileLlamaModel)
8 | AutoModelForCausalLM.register(SmileLlamaConfig, SmileLlamaForCausalLM)
9 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_qwen2/register.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
2 | 
3 | from .configuration_smile_qwen2 import SmileQwen2Config
4 | from .modeling_smile_qwen2 import SmileQwen2ForCausalLM, SmileQwen2Model
5 | 
6 | AutoConfig.register("smile_qwen2", SmileQwen2Config)
7 | AutoModel.register(SmileQwen2Config, SmileQwen2Model)
8 | AutoModelForCausalLM.register(SmileQwen2Config, SmileQwen2ForCausalLM)
9 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml:
--------------------------------------------------------------------------------
 1 | # The 8 task used in the Task Arithmetic paper
 2 | defaults:
 3 |   - clip-vit-base-patch32
 4 |   - clip-vit-base-patch32_sun397
 5 |   - clip-vit-base-patch32_stanford-cars
 6 |   - clip-vit-base-patch32_resisc45
 7 |   - clip-vit-base-patch32_eurosat
 8 |   - clip-vit-base-patch32_svhn
 9 |   - clip-vit-base-patch32_gtsrb
10 |   - clip-vit-base-patch32_mnist
11 |   - clip-vit-base-patch32_dtd
12 | 


--------------------------------------------------------------------------------
/docs/taskpool/flan-t5_generation.md:
--------------------------------------------------------------------------------
 1 | # Flan-T5 Models for Text Generation Tasks
 2 | 
 3 | This task pool provides a set of text generation tasks from the GLUE benchmark for the Flan-T5 model. 
 4 | Each task is associated with a dataset. 
 5 | We report the exact match accuracy metric for CoLA, MNLI, MRPC, QNLI, QQP, RTE, and SST2, and spearman's rho for STSB.
 6 | 
 7 | ## References
 8 | 
 9 | ::: fusion_bench.compat.taskpool.flan_t5_glue_text_generation
10 | 


--------------------------------------------------------------------------------
/config/fabric/loggers/tensorboard_logger.yaml:
--------------------------------------------------------------------------------
 1 | _target_: lightning.fabric.loggers.TensorBoardLogger
 2 | # the logs directory would be `root_dir/name/version_X`
 3 | # for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
 4 | 
 5 | # root directory for all logging
 6 | root_dir: ${path.log_dir}
 7 | # the name of the experiment
 8 | name: ""
 9 | version: ""
10 | sub_dir: null
11 | default_hp_metric: false
12 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - CLIPVisionModelPool@: _template
3 |   - /model/clip-vit@models: clip-vit-base-patch16_eight_tasks
4 |   - /dataset/image_classification/train@train_datasets: the_eight_tasks
5 |   - /dataset/image_classification/test@test_datasets: the_eight_tasks
6 | processor:
7 |   _target_: transformers.CLIPProcessor.from_pretrained
8 |   pretrained_model_name_or_path: openai/clip-vit-base-patch16
9 | 


--------------------------------------------------------------------------------
/config/method/dummy.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: Dummy
3 | # =============================================================================
4 | # No-op method for testing pipelines and wiring.
5 | # Instantiates and exits without modifying models.
6 | # =============================================================================
7 | _target_: fusion_bench.method.DummyAlgorithm
8 | 


--------------------------------------------------------------------------------
/config/method/linear/weighted_average.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: Weighted Average (Linear)
3 | # =============================================================================
4 | _target_: fusion_bench.method.WeightedAverageAlgorithm
5 | normalize: true # if true, the weights will be normalized before merging
6 | weights: # List of weights for each model
7 |   - 0.5
8 |   - 0.5
9 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: google/gemma-2-2b
 4 |   instruction: MergeBench/gemma-2-2b_instruction
 5 |   math: MergeBench/gemma-2-2b_math
 6 |   coding: MergeBench/gemma-2-2b_coding
 7 |   multilingual: MergeBench/gemma-2-2b_multilingual
 8 |   safety: MergeBench/gemma-2-2b_safety
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: google/gemma-2-2b
12 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: google/gemma-2-9b
 4 |   instruction: MergeBench/gemma-2-9b_instruction
 5 |   math: MergeBench/gemma-2-9b_math
 6 |   coding: MergeBench/gemma-2-9b_coding
 7 |   multilingual: MergeBench/gemma-2-9b_multilingual
 8 |   safety: MergeBench/gemma-2-9b_safety
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: google/gemma-2-9b
12 | 


--------------------------------------------------------------------------------
/config/model/clip-vit/download_TALL20_models.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | for MODEL in clip-vit-base-patch32 clip-vit-base-patch16 clip-vit-large-patch14; do
3 |     for TASK in sun397 stanford-cars resisc45 eurosat svhn gtsrb mnist dtd oxford_flowers102 pcam fer2013 oxford-iiit-pet stl10 cifar100 cifar10 food101 fashion_mnist emnist_letters kmnist rendered-sst2; do
4 |         huggingface-cli download --local-dir tanganke/${MODEL}_${TASK} tanganke/${MODEL}_${TASK}
5 |     done
6 | done
7 | 


--------------------------------------------------------------------------------
/examples/open_clip/evaluate_single_model.sh:
--------------------------------------------------------------------------------
 1 | fusion_bench \
 2 |     method=dummy \
 3 |     modelpool=OpenCLIPVisionModelPool/ViT-B-32_individual \
 4 |     taskpool=OpenCLIPVisionModelTaskPool/ViT-B-32_TA8
 5 | 
 6 | fusion_bench \
 7 |     method=dummy \
 8 |     modelpool=OpenCLIPVisionModelPool/ViT-B-32_individual \
 9 |     modelpool.models._pretrained_.pickle_path="$\{...model_dir\}/ViT-B-32/SUN397/finetuned.pt" \
10 |     taskpool=OpenCLIPVisionModelTaskPool/ViT-B-32_TA8
11 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_smile_mistral/register.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
2 | 
3 | from .configuration_smile_mistral import SmileMistralConfig
4 | from .modeling_smile_mistral import SmileMistralForCausalLM, SmileMistralModel
5 | 
6 | AutoConfig.register("smile_mistral", SmileMistralConfig)
7 | AutoModel.register(SmileMistralConfig, SmileMistralModel)
8 | AutoModelForCausalLM.register(SmileMistralConfig, SmileMistralForCausalLM)
9 | 


--------------------------------------------------------------------------------
/config/_get_started/llm_slerp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.programs.FabricModelFusionProgram
 2 | _recursive_: false
 3 | method:
 4 |   _target_: fusion_bench.method.SlerpForCausalLM
 5 |   t: 0.5
 6 | modelpool:
 7 |   _target_: fusion_bench.modelpool.CausalLMPool
 8 |   models:
 9 |     model_1: ibivibiv/alpaca-dragon-72b-v1
10 |     model_2: moreh/MoMo-72B-lora-1.8.7-DPO
11 |   tokenizer: ibivibiv/alpaca-dragon-72b-v1
12 |   enable_lazy_loading: true # load model as LazyStateDict
13 | 


--------------------------------------------------------------------------------
/docs/algorithms/slerp.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: SLERP
 3 | ---
 4 | # Spherical Linear Interpolation (SLERP)
 5 | 
 6 | SLERP stands for Spherical LinEar inteRPolation[^1].
 7 | 
 8 | ## Implementation Details
 9 | 
10 | - [fusion_bench.method.SlerpMergeAlgorithm][]: Architecture-agnostic implementation.
11 | - [fusion_bench.method.SlerpForCausalLM][]: SLERP for large language models
12 | 
13 | [^1]: SLERP For Model Merging – A Primer https://www.coinfeeds.ai/ai-blog/slerp-model-merging-primer
14 | 


--------------------------------------------------------------------------------
/fusion_bench/metrics/text_to_image_generation/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | In this module, we implement some metrics for text-to-image generation tasks.
 3 | Including reward functions for alignment and Reinforcement Learning with Human Feedback training (RLHF).
 4 | """
 5 | 
 6 | # flake8: noqa F401
 7 | from .aesthetic_scorer import aesthetic_scorer
 8 | from .compressibility import jpeg_compressibility_scorer, jpeg_incompressibility_scorer
 9 | from .pickscore_scorer import pickscore_scorer
10 | 


--------------------------------------------------------------------------------
/fusion_bench/models/modeling_losparse_llama/register.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
2 | 
3 | from .configuration_losparse_llama import LoSparseLlamaConfig
4 | from .modeling_losparse_llama import LoSparseLlamaForCausalLM, LoSparseLlamaModel
5 | 
6 | AutoConfig.register("losparse_llama", LoSparseLlamaConfig)
7 | AutoModel.register(LoSparseLlamaConfig, LoSparseLlamaModel)
8 | AutoModelForCausalLM.register(LoSparseLlamaConfig, LoSparseLlamaForCausalLM)
9 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_dtd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - dtd
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - dtd
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: dtd


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_dtd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - dtd
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - dtd
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: dtd


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_gemma-2-2b-it.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "google/gemma-2-2b-it"
 9 |   "MergeBench/gemma-2-2b-it_instruction"
10 |   "MergeBench/gemma-2-2b-it_math"
11 |   "MergeBench/gemma-2-2b-it_coding"
12 |   "MergeBench/gemma-2-2b-it_multilingual"
13 |   "MergeBench/gemma-2-2b-it_safety"
14 | )
15 | 
16 | LM_EVAL_ARGS="--apply_chat_template"
17 | 
18 | evaluate_all_models
19 | 


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_gemma-2-9b-it.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "google/gemma-2-9b-it"
 9 |   "MergeBench/gemma-2-9b-it_instruction"
10 |   "MergeBench/gemma-2-9b-it_math"
11 |   "MergeBench/gemma-2-9b-it_coding"
12 |   "MergeBench/gemma-2-9b-it_multilingual"
13 |   "MergeBench/gemma-2-9b-it_safety"
14 | )
15 | 
16 | LM_EVAL_ARGS="--apply_chat_template"
17 | 
18 | evaluate_all_models
19 | 


--------------------------------------------------------------------------------
/fusion_bench/dataset/fer2013.py:
--------------------------------------------------------------------------------
 1 | from datasets import load_dataset
 2 | 
 3 | 
 4 | def load_fer2013(path: str = "clip-benchmark/wds_fer2013", split: str = "train"):
 5 |     dataset = load_dataset(path, split=split)
 6 |     dataset = dataset.remove_columns(["__key__", "__url__"])
 7 |     dataset = dataset.rename_columns({"jpg": "image", "cls": "label"})
 8 |     return dataset
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     dataset = load_fer2013(split="test")
13 |     print(dataset)
14 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: meta-llama/Llama-3.1-8B
 4 |   instruction: MergeBench/Llama-3.1-8B_instruction
 5 |   math: MergeBench/Llama-3.1-8B_math
 6 |   coding: MergeBench/Llama-3.1-8B_coding
 7 |   multilingual: MergeBench/Llama-3.1-8B_multilingual
 8 |   safety: MergeBench/Llama-3.1-8B_safety
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: meta-llama/Llama-3.1-8B
12 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: meta-llama/Llama-3.2-3B
 4 |   instruction: MergeBench/Llama-3.2-3B_instruction
 5 |   math: MergeBench/Llama-3.2-3B_math
 6 |   coding: MergeBench/Llama-3.2-3B_coding
 7 |   multilingual: MergeBench/Llama-3.2-3B_multilingual
 8 |   safety: MergeBench/Llama-3.2-3B_safety
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: meta-llama/Llama-3.2-3B
12 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: google/gemma-2-2b-it
 4 |   instruction: MergeBench/gemma-2-2b-it_instruction
 5 |   math: MergeBench/gemma-2-2b-it_math
 6 |   coding: MergeBench/gemma-2-2b-it_coding
 7 |   multilingual: MergeBench/gemma-2-2b-it_multilingual
 8 |   safety: MergeBench/gemma-2-2b-it_safety
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: google/gemma-2-2b-it
12 | 


--------------------------------------------------------------------------------
/config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.modelpool.CausalLMPool
 2 | models:
 3 |   _pretrained_: google/gemma-2-9b-it
 4 |   instruction: MergeBench/gemma-2-9b-it_instruction
 5 |   math: MergeBench/gemma-2-9b-it_math
 6 |   coding: MergeBench/gemma-2-9b-it_coding
 7 |   multilingual: MergeBench/gemma-2-9b-it_multilingual
 8 |   safety: MergeBench/gemma-2-9b-it_safety
 9 | model_kwargs:
10 |   torch_dtype: bfloat16
11 | tokenizer: google/gemma-2-9b-it
12 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_dtd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - dtd
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - dtd
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: dtd


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_pcam.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - pcam
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - pcam
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: pcam


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_svhn.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - svhn
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - svhn
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: svhn


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_pcam.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - pcam
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - pcam
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: pcam


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_svhn.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - svhn
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - svhn
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: svhn


--------------------------------------------------------------------------------
/fusion_bench/method/linear/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa F401
 2 | from .expo import ExPOAlgorithm
 3 | from .linear_interpolation import LinearInterpolationAlgorithm
 4 | from .llama_expo import ExPOAlgorithmForLlama
 5 | from .simple_average_for_causallm import SimpleAverageForCausalLM, SimpleAverageForLlama
 6 | from .task_arithmetic_for_causallm import (
 7 |     TaskArithmeticForCausalLM,
 8 |     TaskArithmeticForLlama,
 9 | )
10 | from .ties_merging_for_causallm import TiesMergingForCausalLM
11 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml:
--------------------------------------------------------------------------------
 1 | # The 14 task used in the paper: 
 2 | # Wang et al. Localizing Task Information for Improved Model Merging and Compression
 3 | # http://arxiv.org/abs/2405.07813
 4 | defaults:
 5 |   - CLIPVisionModelPool@: _template
 6 |   - /model/clip-vit@models: clip-vit-base-patch16_TALL14
 7 | processor:
 8 |   _target_: transformers.CLIPProcessor.from_pretrained
 9 |   pretrained_model_name_or_path: openai/clip-vit-base-patch16
10 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_gtsrb.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - gtsrb
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - gtsrb
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: gtsrb


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_mnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - mnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - mnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: mnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_pcam.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - pcam
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - pcam
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: pcam


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_stl10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - stl10
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - stl10
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: stl10


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_svhn.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - svhn
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - svhn
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: svhn


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_gtsrb.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - gtsrb
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - gtsrb
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: gtsrb


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_mnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - mnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - mnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: mnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_stl10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - stl10
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - stl10
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: stl10


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_gtsrb.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - gtsrb
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - gtsrb
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: gtsrb


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_mnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - mnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - mnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: mnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_stl10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - stl10
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - stl10
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: stl10


--------------------------------------------------------------------------------
/config/method/dare/ties_merging.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.method.dare.DareTiesMerging
 2 | # === DARE parameters ===
 3 | sparsity_ratio: 0.5
 4 | only_on_linear_weights: false
 5 | rescale: true
 6 | # === Ties merging parameters ===
 7 | # Scaling factor $\lambda$
 8 | scaling_factor: 0.5
 9 | threshold: 20
10 | # List of keys to remove from the state dict, default is empty
11 | remove_keys: []
12 | # Function to merge the models, default is sum. Options are 'sum', 'mean', and 'max'
13 | merge_func: sum
14 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml:
--------------------------------------------------------------------------------
 1 | # The 14 task used in the paper: 
 2 | # Wang et al. Localizing Task Information for Improved Model Merging and Compression
 3 | # http://arxiv.org/abs/2405.07813
 4 | defaults:
 5 |   - CLIPVisionModelPool@: _template
 6 |   - /model/clip-vit@models: clip-vit-large-patch14_TALL14
 7 | processor:
 8 |   _target_: transformers.CLIPProcessor.from_pretrained
 9 |   pretrained_model_name_or_path: openai/clip-vit-large-patch14
10 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_kmnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - kmnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - kmnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: kmnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_sun397.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - sun397
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - sun397
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: sun397


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_kmnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - kmnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - kmnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: kmnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_sun397.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - sun397
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - sun397
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: sun397


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_kmnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - kmnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - kmnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: kmnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_sun397.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - sun397
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - sun397
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: sun397


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_cifar10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - cifar10
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - cifar10
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: cifar10


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_eurosat.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - eurosat
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - eurosat
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: eurosat


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_fer2013.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - fer2013
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - fer2013
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: fer2013


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_food101.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - food101
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - food101
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: food101


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_cifar10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - cifar10
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - cifar10
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: cifar10


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_cifar100.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - cifar100
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - cifar100
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: cifar100


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_eurosat.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - eurosat
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - eurosat
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: eurosat


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_fer2013.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - fer2013
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - fer2013
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: fer2013


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_food101.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - food101
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - food101
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: food101


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_resisc45.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - resisc45
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - resisc45
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: resisc45


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_cifar10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - cifar10
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - cifar10
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: cifar10


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_cifar100.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - cifar100
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - cifar100
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: cifar100


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_eurosat.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - eurosat
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - eurosat
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: eurosat


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_fer2013.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - fer2013
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - fer2013
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: fer2013


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_food101.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - food101
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - food101
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: food101


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_resisc45.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - resisc45
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - resisc45
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: resisc45


--------------------------------------------------------------------------------
/config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.taskpool.LMEvalHarnessTaskPool
 2 | tasks:
 3 |   - truthfulqa
 4 | batch_size: 1
 5 | verbosity: null
 6 | include_path: null
 7 | apply_chat_template: false
 8 | # if `output_path` is not given, the results will be saved to `log_dir/lm_eval_results`, where `log_dir` is the directory controlled by lightning Fabric.
 9 | output_path: null
10 | # if `log_samples` is true, the samples will be saved to `output_path`.
11 | log_samples: false
12 | 


--------------------------------------------------------------------------------
/examples/mergebench/evaluate_Llama-3.2-3B-Instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$(dirname $(realpath $0))
 4 | 
 5 | source $SCRIPT_DIR/evaluate.sh
 6 | 
 7 | MODELS=(
 8 |   "MergeBench/Llama-3.2-3B-Instruct_instruction"
 9 |   "MergeBench/Llama-3.2-3B-Instruct_math"
10 |   "MergeBench/Llama-3.2-3B-Instruct_coding"
11 |   "MergeBench/Llama-3.2-3B-Instruct_multilingual"
12 |   "MergeBench/Llama-3.2-3B-Instruct_safety"
13 | )
14 | 
15 | LM_EVAL_ARGS="--apply_chat_template"
16 | 
17 | evaluate_all_models
18 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/fashion_mnist.py:
--------------------------------------------------------------------------------
 1 | classname_mapping = {
 2 |     "0": "T - shirt / top",
 3 |     "1": "Trouser",
 4 |     "2": "Pullover",
 5 |     "3": "Dress",
 6 |     "4": "Coat",
 7 |     "5": "Sandal",
 8 |     "6": "Shirt",
 9 |     "7": "Sneaker",
10 |     "8": "Bag",
11 |     "9": "Ankle boot",
12 | }
13 | classnames = [classname_mapping[str(i)] for i in range(10)]
14 | 
15 | templates = [
16 |     lambda c: f"a photo of a {c}.",
17 |     lambda c: f"a photo of the {c}.",
18 | ]
19 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_cifar100.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - cifar100
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - cifar100
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: cifar100


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_resisc45.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - resisc45
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - resisc45
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: resisc45


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - tiny-imagenet
 4 |   - _self_
 5 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
 6 | _recursive_: False
 7 | models:
 8 |   _pretrained_: openai/clip-vit-base-patch32
 9 |   model_1: tanganke/clip-vit-base-patch32_sun397
10 |   model_2: tanganke/clip-vit-base-patch32_stanford-cars
11 | processor: openai/clip-vit-base-patch32
12 | platform: hf
13 | 


--------------------------------------------------------------------------------
/fusion_bench/method/we_moe/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def get_memory_usage(desc):
 5 |     """
 6 |     obtain the current GPU memory usage
 7 | 
 8 |     Returns:
 9 |         str: A string containing the allocated and cached memory in MB.
10 |     """
11 |     allocated = torch.cuda.memory_allocated() / 1024**2  # 转换为 MB
12 |     cached = torch.cuda.memory_reserved() / 1024**2  # 转换为 MB
13 |     return (
14 |         f"{desc}\nAllocated Memory: {allocated:.2f} MB\nCached Memory: {cached:.2f} MB"
15 |     )
16 | 


--------------------------------------------------------------------------------
/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml:
--------------------------------------------------------------------------------
 1 | # This is useful for evluate the performance of a single clip vision model
 2 | #
 3 | # fusion_bench \
 4 | #   modelpool=CLIPVisionModelPool/clip-vit-large-patch14_individual \
 5 | #   modelpool.models._pretrained_=${MODEL_PATH}
 6 | #   ...
 7 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
 8 | _recursive_: False
 9 | models:
10 |   _pretrained_: openai/clip-vit-large-patch14
11 | processor: openai/clip-vit-large-patch14
12 | platform: hf
13 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/base_task.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from omegaconf import DictConfig
 4 | 
 5 | 
 6 | class BaseTask(ABC):
 7 |     _taskpool = None
 8 | 
 9 |     def __init__(self, task_config: DictConfig):
10 |         self.config = task_config
11 | 
12 |     @abstractmethod
13 |     def evaluate(self, model):
14 |         """
15 |         Evaluate the model on the task.
16 |         Returns a dictionary containing the evaluation metrics.
17 |         """
18 |         raise NotImplementedError
19 | 


--------------------------------------------------------------------------------
/config/clip-vit-base-patch32_robustness_corrupted.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - fabric_model_fusion
 3 |   - override modelpool: CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted
 4 |   - override method: dummy # change this to the method you want to use
 5 |   - override taskpool: CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted
 6 |   - _self_
 7 | # `corruption` can be one of:
 8 | # contrast, gaussian_noise, impulse_noise, jpeg_compression, motion_blur, pixelate, spatter
 9 | corruption: gaussian_noise
10 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_fashion_mnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - fashion_mnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - fashion_mnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: fashion_mnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_rendered-sst2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - rendered-sst2
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - rendered-sst2
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: rendered-sst2


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_stanford-cars.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - stanford-cars
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - stanford-cars
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: stanford-cars


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_fashion_mnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - fashion_mnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - fashion_mnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: fashion_mnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_rendered-sst2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - rendered-sst2
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - rendered-sst2
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: rendered-sst2


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_stanford-cars.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - stanford-cars
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - stanford-cars
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: stanford-cars


--------------------------------------------------------------------------------
/fusion_bench/method/task_singular_vector/TSVC.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor, nn
 3 | 
 4 | from fusion_bench import BaseAlgorithm
 5 | 
 6 | from .utils import TSVC_utils, check_parameterNamesMatch
 7 | 
 8 | 
 9 | class TaskSingularVectorCompression(BaseAlgorithm):
10 |     def __init__(self, **kwargs):
11 |         super().__init__(**kwargs)
12 | 
13 |     def run(self, modelpool):
14 |         raise NotImplementedError(
15 |             "Task Singular Vector Compression is not implemented yet."
16 |         )
17 | 


--------------------------------------------------------------------------------
/config/method/bitdelta/bitdelta.yaml:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # FusionBench Method Configuration: BitDelta
 3 | # =============================================================================
 4 | _target_: fusion_bench.method.bitdelta.BitDeltaAlgorithm
 5 | save_dir: null
 6 | save_full_model: false
 7 | # training arguments
 8 | lr: 1e-4
 9 | batch_size: 4
10 | num_steps: 100
11 | # dataset arguments
12 | dataset_name: c4
13 | subset: en
14 | split: train
15 | max_length: 128
16 | 


--------------------------------------------------------------------------------
/config/method/smile_upscaling/singular_projection_merging.yaml:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # FusionBench Method Configuration: SMILE Singular Projection Merging
 3 | # =============================================================================
 4 | name: singular_projection_merging
 5 | # merge device on cuda can accelerate the SVD computation
 6 | device: cuda
 7 | k: 128
 8 | rank: low # or high
 9 | full_matrices: false
10 | # path to save/load the model
11 | model_path: null
12 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_fashion_mnist.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - fashion_mnist
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - fashion_mnist
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: fashion_mnist


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_rendered-sst2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - rendered-sst2
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - rendered-sst2
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: rendered-sst2


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_stanford-cars.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - stanford-cars
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - stanford-cars
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: stanford-cars


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_emnist_letters.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - emnist_letters
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - emnist_letters
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: emnist_letters


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_emnist_letters.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - emnist_letters
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - emnist_letters
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: emnist_letters


--------------------------------------------------------------------------------
/config/method/ensemble/max_model_predictor.yaml:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # FusionBench Method Configuration: Max Model Predictor
3 | # =============================================================================
4 | # Selects the model with maximum confidence or performance per example/task.
5 | # No additional hyperparameters are required.
6 | # =============================================================================
7 | _target_: fusion_bench.method.MaxModelPredictorAlgorithm
8 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_emnist_letters.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - emnist_letters
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - emnist_letters
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: emnist_letters


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - oxford-iiit-pet
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - oxford-iiit-pet
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: oxford-iiit-pet


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - oxford-iiit-pet
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - oxford-iiit-pet
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: oxford-iiit-pet


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_oxford-iiit-pet.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - oxford-iiit-pet
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - oxford-iiit-pet
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: oxford-iiit-pet


--------------------------------------------------------------------------------
/config/taskpool/reward_model_evaluation.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool
 2 | test_datasets:
 3 |   preference_700k:
 4 |     _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
 5 |     tokenizer: ${...tokenizer}
 6 |     path: hendrydong/preference_700K
 7 |     split: train
 8 |     cache_path: null
 9 | dataloader_kwargs:
10 |   shuffle: False
11 |   batch_size: 16
12 | tokenizer: ${..modelpool.tokenizer}
13 | max_num_samples: 1000
14 | seed: 42
15 | 


--------------------------------------------------------------------------------
/docs/api/fusion_bench.utils/data.md:
--------------------------------------------------------------------------------
 1 | # Data Utilities
 2 | 
 3 | ## Dataset Manipulation
 4 | 
 5 | ::: fusion_bench.utils.data
 6 |     options:
 7 |         show_root_full_path: true
 8 |         heading_level: 3
 9 | 
10 | ## Json Import/Export
11 | 
12 | ::: fusion_bench.utils.json
13 |     options:
14 |         show_root_full_path: true
15 |         heading_level: 3
16 | 
17 | ## TensorBoard Data Import
18 | 
19 | ::: fusion_bench.utils.tensorboard
20 |     options:
21 |         show_root_full_path: true
22 |         heading_level: 3
23 | 


--------------------------------------------------------------------------------
/fusion_bench/tasks/clip_classification/fer2013.py:
--------------------------------------------------------------------------------
 1 | classnames = [
 2 |     "angry",
 3 |     "disgusted",
 4 |     "fearful",
 5 |     "happy",
 6 |     "neutral",
 7 |     "sad",
 8 |     "surprised",
 9 | ]
10 | 
11 | templates = [
12 |     lambda c: f"a photo of a {c} looking face.",
13 |     lambda c: f"a photo of a face showing the emotion: {c}.",
14 |     lambda c: f"a photo of a face looking {c}.",
15 |     lambda c: f"a face that looks {c}.",
16 |     lambda c: f"they look {c}.",
17 |     lambda c: f"look at how {c} they are.",
18 | ]
19 | 


--------------------------------------------------------------------------------
/config/method/moe_pruner/moe_pruner.yaml:
--------------------------------------------------------------------------------
 1 | _target_: fusion_bench.method.moe_pruner.MoEPruner
 2 | 
 3 | nsamples: 100
 4 | seed: 42
 5 | device: cuda
 6 | max_seqlen: 2048
 7 | # `prune_type` can be either `unstructured` or `semistructured`
 8 | prune_type: unstructured
 9 | # === options for unstructured pruning ===
10 | # `sparsity_ratio` is the ratio of weights to be pruned, 1 means all weights are pruned
11 | sparsity_ratio: 0.5
12 | # === options for semistructured pruning ===
13 | # 2:4 means 2 out of 4 weights are pruned
14 | n: 2
15 | m: 4
16 | 


--------------------------------------------------------------------------------
/config/method/slerp/slerp_lm.yaml:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # FusionBench Method Configuration: SLERP for Causal LM
 3 | # =============================================================================
 4 | # Spherical linear interpolation between two causal language models.
 5 | # =============================================================================
 6 | _target_: fusion_bench.method.SlerpForCausalLM
 7 | t: 0.5
 8 | model_save_path: ${path.log_dir}/checkpoint
 9 | show_pbar: True
10 | 


--------------------------------------------------------------------------------
/examples/clip_finetune/config/modelpool/clip-finetune_TALL14.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets: TALL14
 3 |   - _self_
 4 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
 5 | base_model: openai/clip-vit-base-patch32
 6 | models:
 7 |   _pretrained_:
 8 |     _target_: transformers.CLIPVisionModel.from_pretrained
 9 |     pretrained_model_name_or_path: ${...base_model}
10 | processor:
11 |   _target_: transformers.CLIPProcessor.from_pretrained
12 |   pretrained_model_name_or_path: ${..base_model}
13 | 


--------------------------------------------------------------------------------
/examples/clip_finetune/config/modelpool/clip-finetune_TALL20.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets: TALL20
 3 |   - _self_
 4 | _target_: fusion_bench.modelpool.CLIPVisionModelPool
 5 | base_model: openai/clip-vit-base-patch32
 6 | models:
 7 |   _pretrained_:
 8 |     _target_: transformers.CLIPVisionModel.from_pretrained
 9 |     pretrained_model_name_or_path: ${...base_model}
10 | processor:
11 |   _target_: transformers.CLIPProcessor.from_pretrained
12 |   pretrained_model_name_or_path: ${..base_model}
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: true
 2 | contact_links:
 3 |   - name: 📚 Documentation
 4 |     url: https://tanganke.github.io/fusion_bench/
 5 |     about: Read the comprehensive FusionBench documentation
 6 |   - name: 💬 GitHub Discussions
 7 |     url: https://github.com/tanganke/fusion_bench/discussions
 8 |     about: Ask questions and discuss ideas with the community
 9 |   - name: 📖 Examples
10 |     url: https://github.com/tanganke/fusion_bench/tree/main/examples
11 |     about: Browse example scripts and notebooks
12 | 


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet152_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - oxford_flowers102
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - oxford_flowers102
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-152
13 |     pretrained: true
14 |     dataset_name: oxford_flowers102


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet18_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - oxford_flowers102
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - oxford_flowers102
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-18
13 |     pretrained: true
14 |     dataset_name: oxford_flowers102


--------------------------------------------------------------------------------
/config/modelpool/ResNetForImageClassification/transformers/resnet50_oxford_flowers102.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /dataset/image_classification/train@train_datasets:
 3 |       - oxford_flowers102
 4 |   - /dataset/image_classification/test@val_datasets:
 5 |       - oxford_flowers102
 6 |   - _self_
 7 | _target_: fusion_bench.modelpool.ResNetForImageClassificationPool
 8 | _recursive_: False
 9 | type: transformers
10 | models:
11 |   _pretrained_:
12 |     config_path: microsoft/resnet-50
13 |     pretrained: true
14 |     dataset_name: oxford_flowers102


--------------------------------------------------------------------------------