├── .github ├── CODE_OF_CONDUCT.md └── CONTRIBUTING.md ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── fastgen ├── __init__.py ├── cache.py ├── forward.py ├── generate.py ├── kernels │ ├── __init__.py │ ├── paged_memcpy.py │ ├── rmsnorm.py │ └── rope.py ├── model.py ├── tools │ ├── __init__.py │ ├── chat.py │ └── serve.py └── utils │ ├── __init__.py │ ├── iset.py │ ├── loading.py │ ├── misc.py │ ├── sampling.py │ ├── tokenizer.py │ ├── tune.py │ └── weights.py ├── mypy.ini ├── pyproject.toml ├── requirements.txt └── scripts ├── attn_overhead.py ├── bench.py ├── data ├── mistral7-fastgen.json ├── mistral7-perf.png └── mistral7-vllm.json ├── paged_memcpy.py ├── rmsnorm.py ├── rope.py └── rope_test.py /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/.github/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/.github/CONTRIBUTING.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw[po] 2 | __pycache__ 3 | *.egg-info 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/README.md -------------------------------------------------------------------------------- /fastgen/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | -------------------------------------------------------------------------------- /fastgen/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/cache.py -------------------------------------------------------------------------------- /fastgen/forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/forward.py -------------------------------------------------------------------------------- /fastgen/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/generate.py -------------------------------------------------------------------------------- /fastgen/kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/kernels/__init__.py -------------------------------------------------------------------------------- /fastgen/kernels/paged_memcpy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/kernels/paged_memcpy.py -------------------------------------------------------------------------------- /fastgen/kernels/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/kernels/rmsnorm.py -------------------------------------------------------------------------------- /fastgen/kernels/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/kernels/rope.py -------------------------------------------------------------------------------- /fastgen/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/model.py -------------------------------------------------------------------------------- /fastgen/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | -------------------------------------------------------------------------------- /fastgen/tools/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/tools/chat.py -------------------------------------------------------------------------------- /fastgen/tools/serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/tools/serve.py -------------------------------------------------------------------------------- /fastgen/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | -------------------------------------------------------------------------------- /fastgen/utils/iset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/iset.py -------------------------------------------------------------------------------- /fastgen/utils/loading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/loading.py -------------------------------------------------------------------------------- /fastgen/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/misc.py -------------------------------------------------------------------------------- /fastgen/utils/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/sampling.py -------------------------------------------------------------------------------- /fastgen/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/tokenizer.py -------------------------------------------------------------------------------- /fastgen/utils/tune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/tune.py -------------------------------------------------------------------------------- /fastgen/utils/weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/fastgen/utils/weights.py -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = true 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/attn_overhead.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/attn_overhead.py -------------------------------------------------------------------------------- /scripts/bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/bench.py -------------------------------------------------------------------------------- /scripts/data/mistral7-fastgen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/data/mistral7-fastgen.json -------------------------------------------------------------------------------- /scripts/data/mistral7-perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/data/mistral7-perf.png -------------------------------------------------------------------------------- /scripts/data/mistral7-vllm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/data/mistral7-vllm.json -------------------------------------------------------------------------------- /scripts/paged_memcpy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/paged_memcpy.py -------------------------------------------------------------------------------- /scripts/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/rmsnorm.py -------------------------------------------------------------------------------- /scripts/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/rope.py -------------------------------------------------------------------------------- /scripts/rope_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/fastgen/HEAD/scripts/rope_test.py --------------------------------------------------------------------------------