├── .gitignore ├── PPT ├── Introduction.pptx └── distillation.pptx ├── README.md └── docs ├── .nojekyll ├── README.md ├── _sidebar.md ├── chapter0 └── chapter0_1.md ├── chapter1 ├── chapter1_0.md ├── chapter1_1.md ├── chapter1_2.md ├── chapter1_3.md ├── chapter1_4.md ├── image-1.png ├── image-2.png ├── image-3.png ├── image-4.png ├── image-5.png ├── image-6.png ├── image-7.png └── image.png ├── chapter10 ├── chapter10_1.md ├── chapter10_2.md ├── chapter10_3.md └── images │ ├── FA_algorithm.png │ ├── FA_memory.png │ ├── HBM_access_is_the_primary_factor_affecting_runtime.png │ ├── LLM.drawio.png │ ├── Parallel_sampling_example.png │ ├── Redundancy.png │ ├── block_translation.png │ ├── pagedattention.png │ ├── 分页.png │ ├── 对比.png │ └── 虚拟内存.png ├── chapter2 ├── README.md ├── chapter2_1.md ├── chapter2_2.md ├── chapter2_3.md ├── chapter2_4.md ├── code │ ├── BabyLlama │ │ ├── 1.clean_and_tokenize.ipynb │ │ ├── 2.teacher_train.ipynb │ │ ├── 3.distill.ipynb │ │ ├── babylm_dataset.py │ │ └── mrclean.py │ ├── CoT │ │ └── CoT_distill.ipynb │ ├── ICL │ │ └── ICL_distill.ipynb │ ├── InstructFollowing │ │ └── InstructFollowing_distill.ipynb │ └── env.yml ├── images │ ├── Figure 1.png │ ├── Figure 2.png │ ├── Figure 3.png │ ├── Figure 4.png │ ├── Figure 5.png │ ├── Figure 6.png │ ├── image-1.png │ ├── image-3.png │ ├── image-4.png │ ├── image-5.png │ ├── image-6.png │ ├── image-7.png │ ├── image-8.png │ ├── image-9.png │ └── v2-543575cc0a0efdaccbd1d24570b8e9e4_b.png └── models │ └── models_download.sh ├── chapter3 ├── chapter3_1.md ├── chapter3_2.md ├── chapter3_2_1.md ├── chapter3_2_2.md ├── chapter3_3.md ├── chapter3_4.md ├── images │ ├── Illustration.png │ ├── dependency.png │ ├── depgraph.png │ ├── llmpruner-flow.png │ ├── llmpruner.png │ ├── lora.png │ ├── network.png │ ├── prune_flow.png │ ├── sparsegpt.png │ ├── sparsity.png │ └── wanda.png └── jupyter │ ├── LLMPruner │ ├── __init__.py │ ├── datasets │ │ ├── dialogue.py │ │ ├── example_samples.py │ │ └── ppl_dataset.py │ ├── models │ │ ├── __init__.py │ │ ├── helper.py │ │ ├── hf_llama │ │ │ └── modeling_llama.py │ │ └── llama │ │ │ ├── __init__.py │ │ │ ├── generation.py │ │ │ ├── model.py │ │ │ └── tokenizer.py │ ├── pruner │ │ ├── __init__.py │ │ ├── hf_llama_pruner.py │ │ └── llama_pruner.py │ ├── templates │ │ └── prompts.py │ ├── torch_pruning │ │ ├── __init__.py │ │ ├── _helpers.py │ │ ├── dependency.py │ │ ├── importance.py │ │ ├── ops.py │ │ ├── pruner │ │ │ ├── __init__.py │ │ │ ├── algorithms │ │ │ │ ├── __init__.py │ │ │ │ ├── batchnorm_scale_pruner.py │ │ │ │ ├── group_norm_pruner.py │ │ │ │ ├── magnitude_based_pruner.py │ │ │ │ ├── metapruner.py │ │ │ │ └── scheduler.py │ │ │ └── function.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── op_counter.py │ │ │ └── utils.py │ └── utils │ │ ├── logger.py │ │ └── prompter.py │ ├── llm-pruner.ipynb │ ├── requirement.txt │ └── wanda.ipynb ├── chapter4 ├── chapter4_1.md ├── chapter4_2.md ├── chapter4_3.md └── chapter4_4.md ├── chapter5 ├── chapter5_1.md ├── chapter5_2.md ├── chapter5_3.md ├── chapter5_4.md ├── experiments │ ├── detect.py │ ├── export.py │ ├── requirements.txt │ ├── yolov5s.onnx │ └── yolov5s.pt └── images │ ├── fig5-1.png │ ├── fig5-2.png │ ├── fig5-3.png │ ├── fig5-4.png │ ├── fig5-5.webp │ ├── fig5-6.png │ ├── fig5-7.png │ ├── fig5-8.png │ └── fig5-9.webp ├── chapter6 ├── chapter6_0.md ├── chapter6_1.md ├── chapter6_2.md └── chapter6_3.md ├── chapter7 ├── appendix.md ├── chapter7_1.md ├── chapter7_2.md ├── chapter7_3.md ├── chapter7_4.md ├── chapter7_5.md └── images │ ├── figure-1.png │ ├── figure-2.png │ ├── figure-3.png │ ├── figure-4.png │ ├── figure-5.png │ ├── figure-6.png │ └── figure-7.png ├── chapter8 ├── chapter8_1.md ├── chapter8_2.md ├── chapter8_3.md ├── chapter8_4.md ├── chapter8_5.md ├── image-1.png ├── image-10.png ├── image-2.png ├── image-3.png ├── image-4.png ├── image-5.png ├── image-6.png ├── image-7.png ├── image-8.png ├── image-9.png └── image.png ├── chapter9 ├── chapter9_1.md ├── chapter9_2.md ├── chapter9_3.md ├── chapter9_4.md └── images │ ├── async-output.png │ ├── beam-search.png │ ├── bentoml_llama3_8b.png │ ├── block-allocation.gif │ ├── chunked-prefill.png │ ├── continuous-batching.png │ ├── distrifusion.png │ ├── distrufuser.png │ ├── kv-cache.png │ ├── kv.png │ ├── latency.png │ ├── llm-inference.png │ ├── lmdeploy.png │ ├── memory-waste.png │ ├── naive_batching.png │ ├── paging.png │ ├── parallel-sampling.png │ ├── request_batching.png │ ├── shared_prefix.png │ ├── speculate.png │ ├── vllm-benchmark.png │ ├── vllm-hf.png │ ├── vllm-output.png │ └── vllm-zmq.png └── index.html /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/.gitignore -------------------------------------------------------------------------------- /PPT/Introduction.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/PPT/Introduction.pptx -------------------------------------------------------------------------------- /PPT/distillation.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/PPT/distillation.pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/README.md -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 本页介绍整体项目。 2 | -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/_sidebar.md -------------------------------------------------------------------------------- /docs/chapter0/chapter0_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter0/chapter0_1.md -------------------------------------------------------------------------------- /docs/chapter1/chapter1_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/chapter1_0.md -------------------------------------------------------------------------------- /docs/chapter1/chapter1_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/chapter1_1.md -------------------------------------------------------------------------------- /docs/chapter1/chapter1_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/chapter1_2.md -------------------------------------------------------------------------------- /docs/chapter1/chapter1_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/chapter1_3.md -------------------------------------------------------------------------------- /docs/chapter1/chapter1_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/chapter1_4.md -------------------------------------------------------------------------------- /docs/chapter1/image-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-1.png -------------------------------------------------------------------------------- /docs/chapter1/image-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-2.png -------------------------------------------------------------------------------- /docs/chapter1/image-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-3.png -------------------------------------------------------------------------------- /docs/chapter1/image-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-4.png -------------------------------------------------------------------------------- /docs/chapter1/image-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-5.png -------------------------------------------------------------------------------- /docs/chapter1/image-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-6.png -------------------------------------------------------------------------------- /docs/chapter1/image-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image-7.png -------------------------------------------------------------------------------- /docs/chapter1/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter1/image.png -------------------------------------------------------------------------------- /docs/chapter10/chapter10_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/chapter10_1.md -------------------------------------------------------------------------------- /docs/chapter10/chapter10_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/chapter10_2.md -------------------------------------------------------------------------------- /docs/chapter10/chapter10_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/chapter10_3.md -------------------------------------------------------------------------------- /docs/chapter10/images/FA_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/FA_algorithm.png -------------------------------------------------------------------------------- /docs/chapter10/images/FA_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/FA_memory.png -------------------------------------------------------------------------------- /docs/chapter10/images/HBM_access_is_the_primary_factor_affecting_runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/HBM_access_is_the_primary_factor_affecting_runtime.png -------------------------------------------------------------------------------- /docs/chapter10/images/LLM.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/LLM.drawio.png -------------------------------------------------------------------------------- /docs/chapter10/images/Parallel_sampling_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/Parallel_sampling_example.png -------------------------------------------------------------------------------- /docs/chapter10/images/Redundancy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/Redundancy.png -------------------------------------------------------------------------------- /docs/chapter10/images/block_translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/block_translation.png -------------------------------------------------------------------------------- /docs/chapter10/images/pagedattention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/pagedattention.png -------------------------------------------------------------------------------- /docs/chapter10/images/分页.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/分页.png -------------------------------------------------------------------------------- /docs/chapter10/images/对比.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/对比.png -------------------------------------------------------------------------------- /docs/chapter10/images/虚拟内存.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter10/images/虚拟内存.png -------------------------------------------------------------------------------- /docs/chapter2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/README.md -------------------------------------------------------------------------------- /docs/chapter2/chapter2_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/chapter2_1.md -------------------------------------------------------------------------------- /docs/chapter2/chapter2_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/chapter2_2.md -------------------------------------------------------------------------------- /docs/chapter2/chapter2_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/chapter2_3.md -------------------------------------------------------------------------------- /docs/chapter2/chapter2_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/chapter2_4.md -------------------------------------------------------------------------------- /docs/chapter2/code/BabyLlama/1.clean_and_tokenize.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/BabyLlama/1.clean_and_tokenize.ipynb -------------------------------------------------------------------------------- /docs/chapter2/code/BabyLlama/2.teacher_train.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/BabyLlama/2.teacher_train.ipynb -------------------------------------------------------------------------------- /docs/chapter2/code/BabyLlama/3.distill.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/BabyLlama/3.distill.ipynb -------------------------------------------------------------------------------- /docs/chapter2/code/BabyLlama/babylm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/BabyLlama/babylm_dataset.py -------------------------------------------------------------------------------- /docs/chapter2/code/BabyLlama/mrclean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/BabyLlama/mrclean.py -------------------------------------------------------------------------------- /docs/chapter2/code/CoT/CoT_distill.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/CoT/CoT_distill.ipynb -------------------------------------------------------------------------------- /docs/chapter2/code/ICL/ICL_distill.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/ICL/ICL_distill.ipynb -------------------------------------------------------------------------------- /docs/chapter2/code/InstructFollowing/InstructFollowing_distill.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/InstructFollowing/InstructFollowing_distill.ipynb -------------------------------------------------------------------------------- /docs/chapter2/code/env.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/code/env.yml -------------------------------------------------------------------------------- /docs/chapter2/images/Figure 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/Figure 1.png -------------------------------------------------------------------------------- /docs/chapter2/images/Figure 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/Figure 2.png -------------------------------------------------------------------------------- /docs/chapter2/images/Figure 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/Figure 3.png -------------------------------------------------------------------------------- /docs/chapter2/images/Figure 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/Figure 4.png -------------------------------------------------------------------------------- /docs/chapter2/images/Figure 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/Figure 5.png -------------------------------------------------------------------------------- /docs/chapter2/images/Figure 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/Figure 6.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-1.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-3.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-4.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-5.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-6.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-7.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-8.png -------------------------------------------------------------------------------- /docs/chapter2/images/image-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/image-9.png -------------------------------------------------------------------------------- /docs/chapter2/images/v2-543575cc0a0efdaccbd1d24570b8e9e4_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/images/v2-543575cc0a0efdaccbd1d24570b8e9e4_b.png -------------------------------------------------------------------------------- /docs/chapter2/models/models_download.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter2/models/models_download.sh -------------------------------------------------------------------------------- /docs/chapter3/chapter3_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/chapter3_1.md -------------------------------------------------------------------------------- /docs/chapter3/chapter3_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/chapter3_2.md -------------------------------------------------------------------------------- /docs/chapter3/chapter3_2_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/chapter3_2_1.md -------------------------------------------------------------------------------- /docs/chapter3/chapter3_2_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/chapter3_2_2.md -------------------------------------------------------------------------------- /docs/chapter3/chapter3_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/chapter3_3.md -------------------------------------------------------------------------------- /docs/chapter3/chapter3_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/chapter3_4.md -------------------------------------------------------------------------------- /docs/chapter3/images/Illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/Illustration.png -------------------------------------------------------------------------------- /docs/chapter3/images/dependency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/dependency.png -------------------------------------------------------------------------------- /docs/chapter3/images/depgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/depgraph.png -------------------------------------------------------------------------------- /docs/chapter3/images/llmpruner-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/llmpruner-flow.png -------------------------------------------------------------------------------- /docs/chapter3/images/llmpruner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/llmpruner.png -------------------------------------------------------------------------------- /docs/chapter3/images/lora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/lora.png -------------------------------------------------------------------------------- /docs/chapter3/images/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/network.png -------------------------------------------------------------------------------- /docs/chapter3/images/prune_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/prune_flow.png -------------------------------------------------------------------------------- /docs/chapter3/images/sparsegpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/sparsegpt.png -------------------------------------------------------------------------------- /docs/chapter3/images/sparsity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/sparsity.png -------------------------------------------------------------------------------- /docs/chapter3/images/wanda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/images/wanda.png -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/__init__.py: -------------------------------------------------------------------------------- 1 | from . import models -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/datasets/dialogue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/datasets/dialogue.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/datasets/example_samples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/datasets/example_samples.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/datasets/ppl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/datasets/ppl_dataset.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import hf_llama -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/models/helper.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/hf_llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/models/hf_llama/modeling_llama.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/models/llama/__init__.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/llama/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/models/llama/generation.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/llama/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/models/llama/model.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/models/llama/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/models/llama/tokenizer.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/pruner/__init__.py: -------------------------------------------------------------------------------- 1 | from .hf_llama_pruner import * 2 | 3 | -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/pruner/hf_llama_pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/pruner/hf_llama_pruner.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/pruner/llama_pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/pruner/llama_pruner.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/templates/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/templates/prompts.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/__init__.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/_helpers.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/dependency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/dependency.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/importance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/importance.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/ops.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/__init__.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/__init__.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/batchnorm_scale_pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/batchnorm_scale_pruner.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/group_norm_pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/group_norm_pruner.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/magnitude_based_pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/magnitude_based_pruner.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/metapruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/metapruner.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/algorithms/scheduler.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/pruner/function.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/utils/__init__.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/utils/op_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/utils/op_counter.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/torch_pruning/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/torch_pruning/utils/utils.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/utils/logger.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/LLMPruner/utils/prompter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/LLMPruner/utils/prompter.py -------------------------------------------------------------------------------- /docs/chapter3/jupyter/llm-pruner.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/llm-pruner.ipynb -------------------------------------------------------------------------------- /docs/chapter3/jupyter/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/requirement.txt -------------------------------------------------------------------------------- /docs/chapter3/jupyter/wanda.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter3/jupyter/wanda.ipynb -------------------------------------------------------------------------------- /docs/chapter4/chapter4_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter4/chapter4_1.md -------------------------------------------------------------------------------- /docs/chapter4/chapter4_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter4/chapter4_2.md -------------------------------------------------------------------------------- /docs/chapter4/chapter4_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter4/chapter4_3.md -------------------------------------------------------------------------------- /docs/chapter4/chapter4_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter4/chapter4_4.md -------------------------------------------------------------------------------- /docs/chapter5/chapter5_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/chapter5_1.md -------------------------------------------------------------------------------- /docs/chapter5/chapter5_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/chapter5_2.md -------------------------------------------------------------------------------- /docs/chapter5/chapter5_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/chapter5_3.md -------------------------------------------------------------------------------- /docs/chapter5/chapter5_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/chapter5_4.md -------------------------------------------------------------------------------- /docs/chapter5/experiments/detect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/experiments/detect.py -------------------------------------------------------------------------------- /docs/chapter5/experiments/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/experiments/export.py -------------------------------------------------------------------------------- /docs/chapter5/experiments/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/experiments/requirements.txt -------------------------------------------------------------------------------- /docs/chapter5/experiments/yolov5s.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/experiments/yolov5s.onnx -------------------------------------------------------------------------------- /docs/chapter5/experiments/yolov5s.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/experiments/yolov5s.pt -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-1.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-2.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-3.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-4.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-5.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-5.webp -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-6.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-7.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-8.png -------------------------------------------------------------------------------- /docs/chapter5/images/fig5-9.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter5/images/fig5-9.webp -------------------------------------------------------------------------------- /docs/chapter6/chapter6_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter6/chapter6_0.md -------------------------------------------------------------------------------- /docs/chapter6/chapter6_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter6/chapter6_1.md -------------------------------------------------------------------------------- /docs/chapter6/chapter6_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter6/chapter6_2.md -------------------------------------------------------------------------------- /docs/chapter6/chapter6_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter6/chapter6_3.md -------------------------------------------------------------------------------- /docs/chapter7/appendix.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/appendix.md -------------------------------------------------------------------------------- /docs/chapter7/chapter7_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/chapter7_1.md -------------------------------------------------------------------------------- /docs/chapter7/chapter7_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/chapter7_2.md -------------------------------------------------------------------------------- /docs/chapter7/chapter7_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/chapter7_3.md -------------------------------------------------------------------------------- /docs/chapter7/chapter7_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/chapter7_4.md -------------------------------------------------------------------------------- /docs/chapter7/chapter7_5.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/chapter7_5.md -------------------------------------------------------------------------------- /docs/chapter7/images/figure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-1.png -------------------------------------------------------------------------------- /docs/chapter7/images/figure-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-2.png -------------------------------------------------------------------------------- /docs/chapter7/images/figure-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-3.png -------------------------------------------------------------------------------- /docs/chapter7/images/figure-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-4.png -------------------------------------------------------------------------------- /docs/chapter7/images/figure-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-5.png -------------------------------------------------------------------------------- /docs/chapter7/images/figure-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-6.png -------------------------------------------------------------------------------- /docs/chapter7/images/figure-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter7/images/figure-7.png -------------------------------------------------------------------------------- /docs/chapter8/chapter8_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/chapter8_1.md -------------------------------------------------------------------------------- /docs/chapter8/chapter8_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/chapter8_2.md -------------------------------------------------------------------------------- /docs/chapter8/chapter8_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/chapter8_3.md -------------------------------------------------------------------------------- /docs/chapter8/chapter8_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/chapter8_4.md -------------------------------------------------------------------------------- /docs/chapter8/chapter8_5.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/chapter8_5.md -------------------------------------------------------------------------------- /docs/chapter8/image-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-1.png -------------------------------------------------------------------------------- /docs/chapter8/image-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-10.png -------------------------------------------------------------------------------- /docs/chapter8/image-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-2.png -------------------------------------------------------------------------------- /docs/chapter8/image-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-3.png -------------------------------------------------------------------------------- /docs/chapter8/image-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-4.png -------------------------------------------------------------------------------- /docs/chapter8/image-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-5.png -------------------------------------------------------------------------------- /docs/chapter8/image-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-6.png -------------------------------------------------------------------------------- /docs/chapter8/image-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-7.png -------------------------------------------------------------------------------- /docs/chapter8/image-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-8.png -------------------------------------------------------------------------------- /docs/chapter8/image-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image-9.png -------------------------------------------------------------------------------- /docs/chapter8/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter8/image.png -------------------------------------------------------------------------------- /docs/chapter9/chapter9_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/chapter9_1.md -------------------------------------------------------------------------------- /docs/chapter9/chapter9_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/chapter9_2.md -------------------------------------------------------------------------------- /docs/chapter9/chapter9_3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/chapter9_3.md -------------------------------------------------------------------------------- /docs/chapter9/chapter9_4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/chapter9_4.md -------------------------------------------------------------------------------- /docs/chapter9/images/async-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/async-output.png -------------------------------------------------------------------------------- /docs/chapter9/images/beam-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/beam-search.png -------------------------------------------------------------------------------- /docs/chapter9/images/bentoml_llama3_8b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/bentoml_llama3_8b.png -------------------------------------------------------------------------------- /docs/chapter9/images/block-allocation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/block-allocation.gif -------------------------------------------------------------------------------- /docs/chapter9/images/chunked-prefill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/chunked-prefill.png -------------------------------------------------------------------------------- /docs/chapter9/images/continuous-batching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/continuous-batching.png -------------------------------------------------------------------------------- /docs/chapter9/images/distrifusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/distrifusion.png -------------------------------------------------------------------------------- /docs/chapter9/images/distrufuser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/distrufuser.png -------------------------------------------------------------------------------- /docs/chapter9/images/kv-cache.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/kv-cache.png -------------------------------------------------------------------------------- /docs/chapter9/images/kv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/kv.png -------------------------------------------------------------------------------- /docs/chapter9/images/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/latency.png -------------------------------------------------------------------------------- /docs/chapter9/images/llm-inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/llm-inference.png -------------------------------------------------------------------------------- /docs/chapter9/images/lmdeploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/lmdeploy.png -------------------------------------------------------------------------------- /docs/chapter9/images/memory-waste.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/memory-waste.png -------------------------------------------------------------------------------- /docs/chapter9/images/naive_batching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/naive_batching.png -------------------------------------------------------------------------------- /docs/chapter9/images/paging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/paging.png -------------------------------------------------------------------------------- /docs/chapter9/images/parallel-sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/parallel-sampling.png -------------------------------------------------------------------------------- /docs/chapter9/images/request_batching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/request_batching.png -------------------------------------------------------------------------------- /docs/chapter9/images/shared_prefix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/shared_prefix.png -------------------------------------------------------------------------------- /docs/chapter9/images/speculate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/speculate.png -------------------------------------------------------------------------------- /docs/chapter9/images/vllm-benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/vllm-benchmark.png -------------------------------------------------------------------------------- /docs/chapter9/images/vllm-hf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/vllm-hf.png -------------------------------------------------------------------------------- /docs/chapter9/images/vllm-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/vllm-output.png -------------------------------------------------------------------------------- /docs/chapter9/images/vllm-zmq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/chapter9/images/vllm-zmq.png -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datawhalechina/llm-deploy/HEAD/docs/index.html --------------------------------------------------------------------------------