├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── attention_sinks ├── __init__.py ├── attention_sink_kv_cache.py ├── generation │ └── utils.py ├── inject_mixin.py └── models │ ├── __init__.py │ ├── auto │ ├── __init__.py │ └── modeling_auto.py │ ├── falcon │ ├── __init__.py │ ├── modeling_falcon.py │ └── pos_shift.py │ ├── gpt_neox │ ├── __init__.py │ ├── modeling_gpt_neox.py │ └── pos_shift.py │ ├── gptj │ ├── __init__.py │ ├── modeling_gptj.py │ └── pos_shift.py │ ├── llama │ ├── __init__.py │ ├── modeling_llama.py │ └── pos_shift.py │ ├── mistral │ ├── __init__.py │ ├── modeling_mistral.py │ └── pos_shift.py │ ├── mpt │ ├── __init__.py │ └── modeling_mpt.py │ ├── qwen │ ├── __init__.py │ └── pos_shift.py │ ├── stablelm_epoch │ ├── __init__.py │ └── pos_shift.py │ └── yi │ ├── __init__.py │ └── pos_shift.py ├── benchmark ├── outputs_btlm_3b_8k_base_2k_win │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_falcon_7b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_gptj_6b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_llama_2_7b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_mistral_7b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_mpt_7b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_pythia_6.9b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_qwen_14b │ ├── attention_sinks.csv │ └── transformers.csv ├── outputs_qwen_7b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_stablelm_3b_4e1t │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── outputs_yi_6b │ ├── attention_sinks.csv │ ├── transformers.csv │ └── windowed.csv ├── perplexity.py ├── plot_perplexity.py └── scripts │ ├── benchmark_btlm.sh │ ├── benchmark_falcon.sh │ ├── benchmark_gptj.sh │ ├── benchmark_llama.sh │ ├── benchmark_mistral.sh │ ├── benchmark_mpt.sh │ ├── benchmark_pythia.sh │ ├── benchmark_qwen.sh │ ├── benchmark_stablelm_epoch.sh │ └── benchmark_yi.sh ├── demo ├── endless_generation.py ├── endless_logs │ ├── attention_sinks │ │ └── meta-llama │ │ │ └── Llama-2-7b-hf.txt │ ├── transformers │ │ └── meta-llama │ │ │ └── Llama-2-7b-hf.txt │ └── windowed │ │ └── meta-llama │ │ └── Llama-2-7b-hf.txt ├── streaming.py ├── streaming_logs │ ├── attention_sinks │ │ ├── HuggingFaceH4 │ │ │ └── zephyr-7b-alpha.txt │ │ ├── meta-llama │ │ │ └── Llama-2-7b-chat-hf.txt │ │ ├── mistralai │ │ │ └── Mistral-7B-Instruct-v0.1.txt │ │ └── mosaicml │ │ │ └── mpt-7b-chat.txt │ ├── transformers │ │ ├── HuggingFaceH4 │ │ │ └── zephyr-7b-alpha.txt │ │ ├── meta-llama │ │ │ └── Llama-2-7b-chat-hf.txt │ │ ├── mistralai │ │ │ └── Mistral-7B-Instruct-v0.1.txt │ │ └── mosaicml │ │ │ └── mpt-7b-chat.txt │ └── windowed │ │ ├── HuggingFaceH4 │ │ └── zephyr-7b-alpha.txt │ │ ├── meta-llama │ │ └── Llama-2-7b-chat-hf.txt │ │ ├── mistralai │ │ └── Mistral-7B-Instruct-v0.1.txt │ │ └── mosaicml │ │ └── mpt-7b-chat.txt └── utils.py ├── pyproject.toml └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/README.md -------------------------------------------------------------------------------- /attention_sinks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/__init__.py -------------------------------------------------------------------------------- /attention_sinks/attention_sink_kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/attention_sink_kv_cache.py -------------------------------------------------------------------------------- /attention_sinks/generation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/generation/utils.py -------------------------------------------------------------------------------- /attention_sinks/inject_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/inject_mixin.py -------------------------------------------------------------------------------- /attention_sinks/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/auto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/auto/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/auto/modeling_auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/auto/modeling_auto.py -------------------------------------------------------------------------------- /attention_sinks/models/falcon/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/falcon/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/falcon/modeling_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/falcon/modeling_falcon.py -------------------------------------------------------------------------------- /attention_sinks/models/falcon/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/falcon/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/gpt_neox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gpt_neox/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/gpt_neox/modeling_gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gpt_neox/modeling_gpt_neox.py -------------------------------------------------------------------------------- /attention_sinks/models/gpt_neox/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gpt_neox/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/gptj/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gptj/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/gptj/modeling_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gptj/modeling_gptj.py -------------------------------------------------------------------------------- /attention_sinks/models/gptj/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gptj/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/llama/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/llama/modeling_llama.py -------------------------------------------------------------------------------- /attention_sinks/models/llama/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/llama/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/mistral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mistral/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/mistral/modeling_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mistral/modeling_mistral.py -------------------------------------------------------------------------------- /attention_sinks/models/mistral/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mistral/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/mpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mpt/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/mpt/modeling_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mpt/modeling_mpt.py -------------------------------------------------------------------------------- /attention_sinks/models/qwen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/qwen/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/qwen/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/qwen/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/stablelm_epoch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/stablelm_epoch/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/stablelm_epoch/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/stablelm_epoch/pos_shift.py -------------------------------------------------------------------------------- /attention_sinks/models/yi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/yi/__init__.py -------------------------------------------------------------------------------- /attention_sinks/models/yi/pos_shift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/yi/pos_shift.py -------------------------------------------------------------------------------- /benchmark/outputs_btlm_3b_8k_base_2k_win/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_btlm_3b_8k_base_2k_win/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_btlm_3b_8k_base_2k_win/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_btlm_3b_8k_base_2k_win/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_btlm_3b_8k_base_2k_win/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_btlm_3b_8k_base_2k_win/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_falcon_7b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_falcon_7b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_falcon_7b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_falcon_7b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_falcon_7b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_falcon_7b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_gptj_6b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_gptj_6b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_gptj_6b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_gptj_6b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_gptj_6b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_gptj_6b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_llama_2_7b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_llama_2_7b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_llama_2_7b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_llama_2_7b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_llama_2_7b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_llama_2_7b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_mistral_7b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mistral_7b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_mistral_7b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mistral_7b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_mistral_7b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mistral_7b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_mpt_7b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mpt_7b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_mpt_7b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mpt_7b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_mpt_7b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mpt_7b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_pythia_6.9b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_pythia_6.9b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_pythia_6.9b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_pythia_6.9b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_pythia_6.9b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_pythia_6.9b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_qwen_14b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_14b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_qwen_14b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_14b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_qwen_7b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_7b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_qwen_7b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_7b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_qwen_7b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_7b/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_stablelm_3b_4e1t/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_stablelm_3b_4e1t/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_stablelm_3b_4e1t/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_stablelm_3b_4e1t/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_stablelm_3b_4e1t/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_stablelm_3b_4e1t/windowed.csv -------------------------------------------------------------------------------- /benchmark/outputs_yi_6b/attention_sinks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_yi_6b/attention_sinks.csv -------------------------------------------------------------------------------- /benchmark/outputs_yi_6b/transformers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_yi_6b/transformers.csv -------------------------------------------------------------------------------- /benchmark/outputs_yi_6b/windowed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_yi_6b/windowed.csv -------------------------------------------------------------------------------- /benchmark/perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/perplexity.py -------------------------------------------------------------------------------- /benchmark/plot_perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/plot_perplexity.py -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_btlm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_btlm.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_falcon.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_falcon.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_gptj.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_gptj.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_llama.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_mistral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_mistral.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_mpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_mpt.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_pythia.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_pythia.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_qwen.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_stablelm_epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_stablelm_epoch.sh -------------------------------------------------------------------------------- /benchmark/scripts/benchmark_yi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_yi.sh -------------------------------------------------------------------------------- /demo/endless_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_generation.py -------------------------------------------------------------------------------- /demo/endless_logs/attention_sinks/meta-llama/Llama-2-7b-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_logs/attention_sinks/meta-llama/Llama-2-7b-hf.txt -------------------------------------------------------------------------------- /demo/endless_logs/transformers/meta-llama/Llama-2-7b-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_logs/transformers/meta-llama/Llama-2-7b-hf.txt -------------------------------------------------------------------------------- /demo/endless_logs/windowed/meta-llama/Llama-2-7b-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_logs/windowed/meta-llama/Llama-2-7b-hf.txt -------------------------------------------------------------------------------- /demo/streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming.py -------------------------------------------------------------------------------- /demo/streaming_logs/attention_sinks/HuggingFaceH4/zephyr-7b-alpha.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/HuggingFaceH4/zephyr-7b-alpha.txt -------------------------------------------------------------------------------- /demo/streaming_logs/attention_sinks/meta-llama/Llama-2-7b-chat-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/meta-llama/Llama-2-7b-chat-hf.txt -------------------------------------------------------------------------------- /demo/streaming_logs/attention_sinks/mistralai/Mistral-7B-Instruct-v0.1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/mistralai/Mistral-7B-Instruct-v0.1.txt -------------------------------------------------------------------------------- /demo/streaming_logs/attention_sinks/mosaicml/mpt-7b-chat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/mosaicml/mpt-7b-chat.txt -------------------------------------------------------------------------------- /demo/streaming_logs/transformers/HuggingFaceH4/zephyr-7b-alpha.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/HuggingFaceH4/zephyr-7b-alpha.txt -------------------------------------------------------------------------------- /demo/streaming_logs/transformers/meta-llama/Llama-2-7b-chat-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/meta-llama/Llama-2-7b-chat-hf.txt -------------------------------------------------------------------------------- /demo/streaming_logs/transformers/mistralai/Mistral-7B-Instruct-v0.1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/mistralai/Mistral-7B-Instruct-v0.1.txt -------------------------------------------------------------------------------- /demo/streaming_logs/transformers/mosaicml/mpt-7b-chat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/mosaicml/mpt-7b-chat.txt -------------------------------------------------------------------------------- /demo/streaming_logs/windowed/HuggingFaceH4/zephyr-7b-alpha.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/HuggingFaceH4/zephyr-7b-alpha.txt -------------------------------------------------------------------------------- /demo/streaming_logs/windowed/meta-llama/Llama-2-7b-chat-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/meta-llama/Llama-2-7b-chat-hf.txt -------------------------------------------------------------------------------- /demo/streaming_logs/windowed/mistralai/Mistral-7B-Instruct-v0.1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/mistralai/Mistral-7B-Instruct-v0.1.txt -------------------------------------------------------------------------------- /demo/streaming_logs/windowed/mosaicml/mpt-7b-chat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/mosaicml/mpt-7b-chat.txt -------------------------------------------------------------------------------- /demo/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/setup.py --------------------------------------------------------------------------------