├── .gitignore ├── LICENSE ├── NOTICE.md ├── README.md ├── dev ├── llminference ├── __init__.py ├── eval_adapter.py ├── experiments.py ├── methods │ ├── __init__.py │ ├── ann_attention.py │ ├── eviction_attention.py │ ├── quantisation.py │ └── sparse_attention.py ├── models │ ├── __init__.py │ ├── gemma_attention.py │ ├── llama_attention.py │ ├── mistral_attention.py │ └── pipelined_models.py ├── tasks │ ├── __init__.py │ ├── bpc.py │ ├── needle.py │ ├── outcompare.py │ ├── qa.py │ ├── repetition.py │ └── summarisation.py └── utility.py ├── requirements.txt ├── scripts ├── Eval.ipynb ├── Quantisation.ipynb ├── generate_outcompare_datasets.py └── job_runner.sh ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── methods ├── __init__.py ├── test_ann_attention.py ├── test_eviction_attention.py ├── test_quantisation.py └── test_sparse_attention.py ├── models └── __init__.py ├── tasks ├── __init__.py ├── test_bpc.py ├── test_needle.py ├── test_outcompare.py ├── test_qa.py ├── test_repetition.py └── test_summarisation.py ├── test_eval_adapter.py ├── test_experiments.py └── test_utility.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/NOTICE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/README.md -------------------------------------------------------------------------------- /dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/dev -------------------------------------------------------------------------------- /llminference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/__init__.py -------------------------------------------------------------------------------- /llminference/eval_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/eval_adapter.py -------------------------------------------------------------------------------- /llminference/experiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/experiments.py -------------------------------------------------------------------------------- /llminference/methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/methods/__init__.py -------------------------------------------------------------------------------- /llminference/methods/ann_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/methods/ann_attention.py -------------------------------------------------------------------------------- /llminference/methods/eviction_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/methods/eviction_attention.py -------------------------------------------------------------------------------- /llminference/methods/quantisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/methods/quantisation.py -------------------------------------------------------------------------------- /llminference/methods/sparse_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/methods/sparse_attention.py -------------------------------------------------------------------------------- /llminference/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/models/__init__.py -------------------------------------------------------------------------------- /llminference/models/gemma_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/models/gemma_attention.py -------------------------------------------------------------------------------- /llminference/models/llama_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/models/llama_attention.py -------------------------------------------------------------------------------- /llminference/models/mistral_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/models/mistral_attention.py -------------------------------------------------------------------------------- /llminference/models/pipelined_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/models/pipelined_models.py -------------------------------------------------------------------------------- /llminference/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/__init__.py -------------------------------------------------------------------------------- /llminference/tasks/bpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/bpc.py -------------------------------------------------------------------------------- /llminference/tasks/needle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/needle.py -------------------------------------------------------------------------------- /llminference/tasks/outcompare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/outcompare.py -------------------------------------------------------------------------------- /llminference/tasks/qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/qa.py -------------------------------------------------------------------------------- /llminference/tasks/repetition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/repetition.py -------------------------------------------------------------------------------- /llminference/tasks/summarisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/tasks/summarisation.py -------------------------------------------------------------------------------- /llminference/utility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/llminference/utility.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/Eval.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/scripts/Eval.ipynb -------------------------------------------------------------------------------- /scripts/Quantisation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/scripts/Quantisation.ipynb -------------------------------------------------------------------------------- /scripts/generate_outcompare_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/scripts/generate_outcompare_datasets.py -------------------------------------------------------------------------------- /scripts/job_runner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/scripts/job_runner.sh -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/methods/__init__.py -------------------------------------------------------------------------------- /tests/methods/test_ann_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/methods/test_ann_attention.py -------------------------------------------------------------------------------- /tests/methods/test_eviction_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/methods/test_eviction_attention.py -------------------------------------------------------------------------------- /tests/methods/test_quantisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/methods/test_quantisation.py -------------------------------------------------------------------------------- /tests/methods/test_sparse_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/methods/test_sparse_attention.py -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/__init__.py -------------------------------------------------------------------------------- /tests/tasks/test_bpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/test_bpc.py -------------------------------------------------------------------------------- /tests/tasks/test_needle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/test_needle.py -------------------------------------------------------------------------------- /tests/tasks/test_outcompare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/test_outcompare.py -------------------------------------------------------------------------------- /tests/tasks/test_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/test_qa.py -------------------------------------------------------------------------------- /tests/tasks/test_repetition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/test_repetition.py -------------------------------------------------------------------------------- /tests/tasks/test_summarisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/tasks/test_summarisation.py -------------------------------------------------------------------------------- /tests/test_eval_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/test_eval_adapter.py -------------------------------------------------------------------------------- /tests/test_experiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/test_experiments.py -------------------------------------------------------------------------------- /tests/test_utility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphcore-research/llm-inference-research/HEAD/tests/test_utility.py --------------------------------------------------------------------------------