├── LICENSE ├── README.md ├── assets ├── Framework2.png ├── Wechat.JPG ├── dinfer_tps.png ├── logo.svg └── wechat.JPG ├── benchmarks ├── benchmark.py ├── benchmark_dataset.py ├── benchmark_dataset_fastdllm.py ├── benchmark_dataset_sglang.py └── benchmark_dataset_sorted.py ├── evaluations ├── eval_dinfer.py ├── eval_guide.md ├── eval_llada_moe.sh └── tasks │ ├── gsm8k │ ├── gsm8k-llada-moe.yaml │ └── gsm8k-llada1.5.yaml │ └── mbpp_sanitized │ ├── mbpp_sanitized_llada1.5.yaml │ ├── mbpp_sanitized_llada_moe.yaml │ └── utils.py ├── python └── dinfer │ ├── __init__.py │ ├── decoding │ ├── __init__.py │ ├── diffusion_runner.py │ ├── generate_cache.py │ ├── generate_dist.py │ ├── generate_fastdllm.py │ ├── generate_hierarchy.py │ ├── generate_merge.py │ ├── generate_uniform.py │ ├── parallel_strategy.py │ ├── serving.py │ └── utils.py │ └── model │ ├── __init__.py │ ├── configuration_bailing_moe_v2.py │ ├── configuration_llada.py │ ├── configuration_llada2_moe.py │ ├── configuration_olmoe.py │ ├── modeling_fused_olmoe.py │ ├── modeling_llada.py │ ├── modeling_llada2_moe.py │ ├── modeling_llada2_moe_sglang.py │ ├── modeling_llada_fastdllm.py │ └── tp_linear.py ├── setup.py ├── tests ├── test.py ├── test_bd.py ├── test_bd_sample.json ├── test_bd_serving.py ├── test_bd_serving_tpep.py ├── test_generate.py ├── test_llada.py ├── test_llada_moe.py ├── test_serving.py └── test_wo_model.py ├── tools ├── __init__.py ├── configuration_lladamoe.py ├── fuse_moe.py ├── modeling_fused_lladamoe.py └── transfer.py └── wechat.JPG /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/README.md -------------------------------------------------------------------------------- /assets/Framework2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/assets/Framework2.png -------------------------------------------------------------------------------- /assets/Wechat.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/assets/Wechat.JPG -------------------------------------------------------------------------------- /assets/dinfer_tps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/assets/dinfer_tps.png -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/assets/logo.svg -------------------------------------------------------------------------------- /assets/wechat.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/assets/wechat.JPG -------------------------------------------------------------------------------- /benchmarks/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/benchmarks/benchmark.py -------------------------------------------------------------------------------- /benchmarks/benchmark_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/benchmarks/benchmark_dataset.py -------------------------------------------------------------------------------- /benchmarks/benchmark_dataset_fastdllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/benchmarks/benchmark_dataset_fastdllm.py -------------------------------------------------------------------------------- /benchmarks/benchmark_dataset_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/benchmarks/benchmark_dataset_sglang.py -------------------------------------------------------------------------------- /benchmarks/benchmark_dataset_sorted.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/benchmarks/benchmark_dataset_sorted.py -------------------------------------------------------------------------------- /evaluations/eval_dinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/eval_dinfer.py -------------------------------------------------------------------------------- /evaluations/eval_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/eval_guide.md -------------------------------------------------------------------------------- /evaluations/eval_llada_moe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/eval_llada_moe.sh -------------------------------------------------------------------------------- /evaluations/tasks/gsm8k/gsm8k-llada-moe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/tasks/gsm8k/gsm8k-llada-moe.yaml -------------------------------------------------------------------------------- /evaluations/tasks/gsm8k/gsm8k-llada1.5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/tasks/gsm8k/gsm8k-llada1.5.yaml -------------------------------------------------------------------------------- /evaluations/tasks/mbpp_sanitized/mbpp_sanitized_llada1.5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/tasks/mbpp_sanitized/mbpp_sanitized_llada1.5.yaml -------------------------------------------------------------------------------- /evaluations/tasks/mbpp_sanitized/mbpp_sanitized_llada_moe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/tasks/mbpp_sanitized/mbpp_sanitized_llada_moe.yaml -------------------------------------------------------------------------------- /evaluations/tasks/mbpp_sanitized/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/evaluations/tasks/mbpp_sanitized/utils.py -------------------------------------------------------------------------------- /python/dinfer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/__init__.py -------------------------------------------------------------------------------- /python/dinfer/decoding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/__init__.py -------------------------------------------------------------------------------- /python/dinfer/decoding/diffusion_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/diffusion_runner.py -------------------------------------------------------------------------------- /python/dinfer/decoding/generate_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/generate_cache.py -------------------------------------------------------------------------------- /python/dinfer/decoding/generate_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/generate_dist.py -------------------------------------------------------------------------------- /python/dinfer/decoding/generate_fastdllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/generate_fastdllm.py -------------------------------------------------------------------------------- /python/dinfer/decoding/generate_hierarchy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/generate_hierarchy.py -------------------------------------------------------------------------------- /python/dinfer/decoding/generate_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/generate_merge.py -------------------------------------------------------------------------------- /python/dinfer/decoding/generate_uniform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/generate_uniform.py -------------------------------------------------------------------------------- /python/dinfer/decoding/parallel_strategy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/parallel_strategy.py -------------------------------------------------------------------------------- /python/dinfer/decoding/serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/serving.py -------------------------------------------------------------------------------- /python/dinfer/decoding/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/decoding/utils.py -------------------------------------------------------------------------------- /python/dinfer/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/__init__.py -------------------------------------------------------------------------------- /python/dinfer/model/configuration_bailing_moe_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/configuration_bailing_moe_v2.py -------------------------------------------------------------------------------- /python/dinfer/model/configuration_llada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/configuration_llada.py -------------------------------------------------------------------------------- /python/dinfer/model/configuration_llada2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/configuration_llada2_moe.py -------------------------------------------------------------------------------- /python/dinfer/model/configuration_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/configuration_olmoe.py -------------------------------------------------------------------------------- /python/dinfer/model/modeling_fused_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/modeling_fused_olmoe.py -------------------------------------------------------------------------------- /python/dinfer/model/modeling_llada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/modeling_llada.py -------------------------------------------------------------------------------- /python/dinfer/model/modeling_llada2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/modeling_llada2_moe.py -------------------------------------------------------------------------------- /python/dinfer/model/modeling_llada2_moe_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/modeling_llada2_moe_sglang.py -------------------------------------------------------------------------------- /python/dinfer/model/modeling_llada_fastdllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/modeling_llada_fastdllm.py -------------------------------------------------------------------------------- /python/dinfer/model/tp_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/python/dinfer/model/tp_linear.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/setup.py -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test.py -------------------------------------------------------------------------------- /tests/test_bd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_bd.py -------------------------------------------------------------------------------- /tests/test_bd_sample.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_bd_sample.json -------------------------------------------------------------------------------- /tests/test_bd_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_bd_serving.py -------------------------------------------------------------------------------- /tests/test_bd_serving_tpep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_bd_serving_tpep.py -------------------------------------------------------------------------------- /tests/test_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_generate.py -------------------------------------------------------------------------------- /tests/test_llada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_llada.py -------------------------------------------------------------------------------- /tests/test_llada_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_llada_moe.py -------------------------------------------------------------------------------- /tests/test_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_serving.py -------------------------------------------------------------------------------- /tests/test_wo_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tests/test_wo_model.py -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/configuration_lladamoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tools/configuration_lladamoe.py -------------------------------------------------------------------------------- /tools/fuse_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tools/fuse_moe.py -------------------------------------------------------------------------------- /tools/modeling_fused_lladamoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tools/modeling_fused_lladamoe.py -------------------------------------------------------------------------------- /tools/transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/tools/transfer.py -------------------------------------------------------------------------------- /wechat.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inclusionAI/dInfer/HEAD/wechat.JPG --------------------------------------------------------------------------------