├── .gitignore ├── LICENSE ├── README.md ├── asset ├── key-idea.png └── results.png ├── benchmarks ├── README.md ├── eval-baseline.py ├── latency.py ├── microbench.py └── mixtral_offloading │ ├── LICENSE │ ├── README-old.md │ ├── README.md │ ├── demo.ipynb │ ├── download.sh │ ├── hyak_run.slurm │ ├── requirements.txt │ ├── run.sh │ └── src │ ├── build_model.py │ ├── custom_layers.py │ ├── expert_cache.py │ ├── expert_wrapper.py │ ├── packing.py │ ├── triton_kernels.py │ └── utils.py ├── requirements.txt └── src └── fiddler ├── __init__.py ├── infer.py └── mixtral.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/README.md -------------------------------------------------------------------------------- /asset/key-idea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/asset/key-idea.png -------------------------------------------------------------------------------- /asset/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/asset/results.png -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/eval-baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/eval-baseline.py -------------------------------------------------------------------------------- /benchmarks/latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/latency.py -------------------------------------------------------------------------------- /benchmarks/microbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/microbench.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/LICENSE -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/README-old.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/README-old.md -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/README.md -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/demo.ipynb -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/download.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/download.sh -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/hyak_run.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/hyak_run.slurm -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/requirements.txt -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/run.sh -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/build_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/build_model.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/custom_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/custom_layers.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/expert_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/expert_cache.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/expert_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/expert_wrapper.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/packing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/packing.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/triton_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/triton_kernels.py -------------------------------------------------------------------------------- /benchmarks/mixtral_offloading/src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/benchmarks/mixtral_offloading/src/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.26.1 2 | torch==2.1.2 3 | transformers==4.36.2 -------------------------------------------------------------------------------- /src/fiddler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/src/fiddler/__init__.py -------------------------------------------------------------------------------- /src/fiddler/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/src/fiddler/infer.py -------------------------------------------------------------------------------- /src/fiddler/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efeslab/fiddler/HEAD/src/fiddler/mixtral.py --------------------------------------------------------------------------------