├── .dockerignore ├── .env ├── .github ├── FUNDING.yml └── workflows │ ├── build-wheels-fix.yml │ ├── build-wheels-release-rocm.yml │ ├── build-wheels-release.yml │ ├── build-wheels-rocm.yml │ └── build-wheels.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── datasets ├── download_datasets.py └── wikitext2_val_sample.jsonl ├── doc ├── TODO.md ├── _screenshot.jpg └── model_compatibility.md ├── docker-compose.yml ├── entrypoint.sh ├── example_alt_generator.py ├── example_basic.py ├── example_batch.py ├── example_cfg.py ├── example_chatbot.py ├── example_flask.py ├── example_lora.py ├── example_ws.py ├── exllama ├── __init__.py ├── alt_generator.py ├── cuda_ext.py ├── generator.py ├── lora.py ├── model.py └── tokenizer.py ├── exllama_ext ├── cpu_func │ ├── rep_penalty.cpp │ └── rep_penalty.h ├── cuda_buffers.cu ├── cuda_buffers.cuh ├── cuda_compat.cuh ├── cuda_func │ ├── column_remap.cu │ ├── column_remap.cuh │ ├── half_matmul.cu │ ├── half_matmul.cuh │ ├── q4_attn.cu │ ├── q4_attn.cuh │ ├── q4_matmul.cu │ ├── q4_matmul.cuh │ ├── q4_matrix.cu │ ├── q4_matrix.cuh │ ├── q4_mlp.cu │ ├── q4_mlp.cuh │ ├── rms_norm.cu │ ├── rms_norm.cuh │ ├── rope.cu │ └── rope.cuh ├── exllama_ext.cpp ├── hip_compat.cuh ├── matrix.cuh ├── tuning.h └── util.cuh ├── globals.py ├── model_init.py ├── perplexity.py ├── prompt_chatbort.txt ├── requirements-web.txt ├── requirements.txt ├── setup.py ├── sh ├── test_benchmark_perf.sh ├── test_benchmark_perf2.sh ├── test_benchmark_ppl.sh └── test_compat.sh ├── test_benchmark_inference.py ├── util └── shard.py └── webui ├── app.py ├── session.py ├── static ├── main.js └── style.css └── templates └── index.html /.dockerignore: -------------------------------------------------------------------------------- 1 | exllama_sessions 2 | models 3 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.env -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: turboderp 2 | -------------------------------------------------------------------------------- /.github/workflows/build-wheels-fix.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.github/workflows/build-wheels-fix.yml -------------------------------------------------------------------------------- /.github/workflows/build-wheels-release-rocm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.github/workflows/build-wheels-release-rocm.yml -------------------------------------------------------------------------------- /.github/workflows/build-wheels-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.github/workflows/build-wheels-release.yml -------------------------------------------------------------------------------- /.github/workflows/build-wheels-rocm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.github/workflows/build-wheels-rocm.yml -------------------------------------------------------------------------------- /.github/workflows/build-wheels.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.github/workflows/build-wheels.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/README.md -------------------------------------------------------------------------------- /datasets/download_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/datasets/download_datasets.py -------------------------------------------------------------------------------- /datasets/wikitext2_val_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/datasets/wikitext2_val_sample.jsonl -------------------------------------------------------------------------------- /doc/TODO.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/doc/TODO.md -------------------------------------------------------------------------------- /doc/_screenshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/doc/_screenshot.jpg -------------------------------------------------------------------------------- /doc/model_compatibility.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/doc/model_compatibility.md -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/entrypoint.sh -------------------------------------------------------------------------------- /example_alt_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_alt_generator.py -------------------------------------------------------------------------------- /example_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_basic.py -------------------------------------------------------------------------------- /example_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_batch.py -------------------------------------------------------------------------------- /example_cfg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_cfg.py -------------------------------------------------------------------------------- /example_chatbot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_chatbot.py -------------------------------------------------------------------------------- /example_flask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_flask.py -------------------------------------------------------------------------------- /example_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_lora.py -------------------------------------------------------------------------------- /example_ws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/example_ws.py -------------------------------------------------------------------------------- /exllama/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cuda_ext, generator, model, tokenizer 2 | -------------------------------------------------------------------------------- /exllama/alt_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama/alt_generator.py -------------------------------------------------------------------------------- /exllama/cuda_ext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama/cuda_ext.py -------------------------------------------------------------------------------- /exllama/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama/generator.py -------------------------------------------------------------------------------- /exllama/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama/lora.py -------------------------------------------------------------------------------- /exllama/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama/model.py -------------------------------------------------------------------------------- /exllama/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama/tokenizer.py -------------------------------------------------------------------------------- /exllama_ext/cpu_func/rep_penalty.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cpu_func/rep_penalty.cpp -------------------------------------------------------------------------------- /exllama_ext/cpu_func/rep_penalty.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cpu_func/rep_penalty.h -------------------------------------------------------------------------------- /exllama_ext/cuda_buffers.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_buffers.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_buffers.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_buffers.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_compat.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/column_remap.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/column_remap.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/column_remap.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/column_remap.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/half_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/half_matmul.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/half_matmul.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/half_matmul.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_attn.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_attn.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_attn.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_attn.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_matmul.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_matmul.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_matmul.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_matrix.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_matrix.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_matrix.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_mlp.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_mlp.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/q4_mlp.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/q4_mlp.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/rms_norm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/rms_norm.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/rms_norm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/rms_norm.cuh -------------------------------------------------------------------------------- /exllama_ext/cuda_func/rope.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/rope.cu -------------------------------------------------------------------------------- /exllama_ext/cuda_func/rope.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/cuda_func/rope.cuh -------------------------------------------------------------------------------- /exllama_ext/exllama_ext.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/exllama_ext.cpp -------------------------------------------------------------------------------- /exllama_ext/hip_compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/hip_compat.cuh -------------------------------------------------------------------------------- /exllama_ext/matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/matrix.cuh -------------------------------------------------------------------------------- /exllama_ext/tuning.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/tuning.h -------------------------------------------------------------------------------- /exllama_ext/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/exllama_ext/util.cuh -------------------------------------------------------------------------------- /globals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/globals.py -------------------------------------------------------------------------------- /model_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/model_init.py -------------------------------------------------------------------------------- /perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/perplexity.py -------------------------------------------------------------------------------- /prompt_chatbort.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/prompt_chatbort.txt -------------------------------------------------------------------------------- /requirements-web.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/requirements-web.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/setup.py -------------------------------------------------------------------------------- /sh/test_benchmark_perf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/sh/test_benchmark_perf.sh -------------------------------------------------------------------------------- /sh/test_benchmark_perf2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/sh/test_benchmark_perf2.sh -------------------------------------------------------------------------------- /sh/test_benchmark_ppl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/sh/test_benchmark_ppl.sh -------------------------------------------------------------------------------- /sh/test_compat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/sh/test_compat.sh -------------------------------------------------------------------------------- /test_benchmark_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/test_benchmark_inference.py -------------------------------------------------------------------------------- /util/shard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/util/shard.py -------------------------------------------------------------------------------- /webui/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/webui/app.py -------------------------------------------------------------------------------- /webui/session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/webui/session.py -------------------------------------------------------------------------------- /webui/static/main.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/webui/static/main.js -------------------------------------------------------------------------------- /webui/static/style.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/webui/static/style.css -------------------------------------------------------------------------------- /webui/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jllllll/exllama/HEAD/webui/templates/index.html --------------------------------------------------------------------------------