├── .gitignore ├── README.md ├── code_data └── mbpp.test.jsonl ├── generate.py ├── gpt4_as_judge_gsm8k.py ├── lm_eval_task_config └── gsm8k_cot_zeroshot_alpaca.yaml ├── merge.py ├── mergekit ├── .github │ └── workflows │ │ └── pre-commit.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── docs │ └── moe.md ├── examples │ ├── gradient-slerp.yml │ ├── linear.yml │ ├── mega.yml │ ├── orcamini-platy-44layer.yml │ └── ties.yml ├── mergekit │ ├── __init__.py │ ├── _data │ │ ├── __init__.py │ │ └── architectures │ │ │ ├── __init__.py │ │ │ ├── baichuan.json │ │ │ ├── chatglm.json │ │ │ ├── falcon.json │ │ │ ├── gemma.json │ │ │ ├── gpt-neox.json │ │ │ ├── gpt2-sequence-classification.json │ │ │ ├── gpt2.json │ │ │ ├── jais.json │ │ │ ├── llama.json │ │ │ ├── mistral.json │ │ │ ├── phi-1.json │ │ │ ├── phi2-old.json │ │ │ ├── phi2.json │ │ │ ├── qwen.json │ │ │ ├── qwen2.json │ │ │ ├── stablelm.json │ │ │ └── starcoder2.json │ ├── architecture.py │ ├── card.py │ ├── common.py │ ├── config.py │ ├── graph.py │ ├── io │ │ ├── __init__.py │ │ ├── lazy_tensor_loader.py │ │ ├── lazy_unpickle.py │ │ ├── loader.py │ │ ├── tasks.py │ │ └── tensor_writer.py │ ├── merge.py │ ├── merge_methods │ │ ├── __init__.py │ │ ├── base.py │ │ ├── generalized_task_arithmetic.py │ │ ├── linear.py │ │ ├── passthrough.py │ │ ├── slerp.py │ │ └── tokenizer_permute.py │ ├── options.py │ ├── plan.py │ ├── scripts │ │ ├── __init__.py │ │ ├── bakllama.py │ │ ├── layershuffle.py │ │ ├── legacy.py │ │ ├── megamerge.py │ │ ├── mixtral_moe.py │ │ └── run_yaml.py │ ├── sparsify.py │ └── tokenizer.py ├── pyproject.toml └── tests │ ├── common.py │ ├── test_basic_merges.py │ ├── test_graph.py │ ├── test_io.py │ ├── test_sparsify.py │ └── test_tokenizer.py ├── requirements.txt └── utils └── evaluate_llms_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/README.md -------------------------------------------------------------------------------- /code_data/mbpp.test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/code_data/mbpp.test.jsonl -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/generate.py -------------------------------------------------------------------------------- /gpt4_as_judge_gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/gpt4_as_judge_gsm8k.py -------------------------------------------------------------------------------- /lm_eval_task_config/gsm8k_cot_zeroshot_alpaca.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/lm_eval_task_config/gsm8k_cot_zeroshot_alpaca.yaml -------------------------------------------------------------------------------- /merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/merge.py -------------------------------------------------------------------------------- /mergekit/.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /mergekit/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/.gitignore -------------------------------------------------------------------------------- /mergekit/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/.pre-commit-config.yaml -------------------------------------------------------------------------------- /mergekit/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/LICENSE -------------------------------------------------------------------------------- /mergekit/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/README.md -------------------------------------------------------------------------------- /mergekit/docs/moe.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/docs/moe.md -------------------------------------------------------------------------------- /mergekit/examples/gradient-slerp.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/examples/gradient-slerp.yml -------------------------------------------------------------------------------- /mergekit/examples/linear.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/examples/linear.yml -------------------------------------------------------------------------------- /mergekit/examples/mega.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/examples/mega.yml -------------------------------------------------------------------------------- /mergekit/examples/orcamini-platy-44layer.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/examples/orcamini-platy-44layer.yml -------------------------------------------------------------------------------- /mergekit/examples/ties.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/examples/ties.yml -------------------------------------------------------------------------------- /mergekit/mergekit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mergekit/mergekit/_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/baichuan.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/baichuan.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/chatglm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/chatglm.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/falcon.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/falcon.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/gemma.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/gemma.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/gpt-neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/gpt-neox.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/gpt2-sequence-classification.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/gpt2-sequence-classification.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/gpt2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/gpt2.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/jais.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/jais.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/llama.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/llama.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/mistral.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/mistral.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/phi-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/phi-1.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/phi2-old.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/phi2-old.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/phi2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/phi2.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/qwen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/qwen.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/qwen2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/qwen2.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/stablelm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/stablelm.json -------------------------------------------------------------------------------- /mergekit/mergekit/_data/architectures/starcoder2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/_data/architectures/starcoder2.json -------------------------------------------------------------------------------- /mergekit/mergekit/architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/architecture.py -------------------------------------------------------------------------------- /mergekit/mergekit/card.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/card.py -------------------------------------------------------------------------------- /mergekit/mergekit/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/common.py -------------------------------------------------------------------------------- /mergekit/mergekit/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/config.py -------------------------------------------------------------------------------- /mergekit/mergekit/graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/graph.py -------------------------------------------------------------------------------- /mergekit/mergekit/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/io/__init__.py -------------------------------------------------------------------------------- /mergekit/mergekit/io/lazy_tensor_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/io/lazy_tensor_loader.py -------------------------------------------------------------------------------- /mergekit/mergekit/io/lazy_unpickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/io/lazy_unpickle.py -------------------------------------------------------------------------------- /mergekit/mergekit/io/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/io/loader.py -------------------------------------------------------------------------------- /mergekit/mergekit/io/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/io/tasks.py -------------------------------------------------------------------------------- /mergekit/mergekit/io/tensor_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/io/tensor_writer.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/__init__.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/base.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/generalized_task_arithmetic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/generalized_task_arithmetic.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/linear.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/passthrough.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/passthrough.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/slerp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/slerp.py -------------------------------------------------------------------------------- /mergekit/mergekit/merge_methods/tokenizer_permute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/merge_methods/tokenizer_permute.py -------------------------------------------------------------------------------- /mergekit/mergekit/options.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/options.py -------------------------------------------------------------------------------- /mergekit/mergekit/plan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/plan.py -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/bakllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/scripts/bakllama.py -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/layershuffle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/scripts/layershuffle.py -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/scripts/legacy.py -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/megamerge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/scripts/megamerge.py -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/mixtral_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/scripts/mixtral_moe.py -------------------------------------------------------------------------------- /mergekit/mergekit/scripts/run_yaml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/scripts/run_yaml.py -------------------------------------------------------------------------------- /mergekit/mergekit/sparsify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/sparsify.py -------------------------------------------------------------------------------- /mergekit/mergekit/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/mergekit/tokenizer.py -------------------------------------------------------------------------------- /mergekit/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/pyproject.toml -------------------------------------------------------------------------------- /mergekit/tests/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/tests/common.py -------------------------------------------------------------------------------- /mergekit/tests/test_basic_merges.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/tests/test_basic_merges.py -------------------------------------------------------------------------------- /mergekit/tests/test_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/tests/test_graph.py -------------------------------------------------------------------------------- /mergekit/tests/test_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/tests/test_io.py -------------------------------------------------------------------------------- /mergekit/tests/test_sparsify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/tests/test_sparsify.py -------------------------------------------------------------------------------- /mergekit/tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/mergekit/tests/test_tokenizer.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/requirements.txt -------------------------------------------------------------------------------- /utils/evaluate_llms_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declare-lab/della/HEAD/utils/evaluate_llms_utils.py --------------------------------------------------------------------------------