├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── attention_sinks
    ├── __init__.py
    ├── attention_sink_kv_cache.py
    ├── generation
    │   └── utils.py
    ├── inject_mixin.py
    └── models
    │   ├── __init__.py
    │   ├── auto
    │       ├── __init__.py
    │       └── modeling_auto.py
    │   ├── falcon
    │       ├── __init__.py
    │       ├── modeling_falcon.py
    │       └── pos_shift.py
    │   ├── gpt_neox
    │       ├── __init__.py
    │       ├── modeling_gpt_neox.py
    │       └── pos_shift.py
    │   ├── gptj
    │       ├── __init__.py
    │       ├── modeling_gptj.py
    │       └── pos_shift.py
    │   ├── llama
    │       ├── __init__.py
    │       ├── modeling_llama.py
    │       └── pos_shift.py
    │   ├── mistral
    │       ├── __init__.py
    │       ├── modeling_mistral.py
    │       └── pos_shift.py
    │   ├── mpt
    │       ├── __init__.py
    │       └── modeling_mpt.py
    │   ├── qwen
    │       ├── __init__.py
    │       └── pos_shift.py
    │   ├── stablelm_epoch
    │       ├── __init__.py
    │       └── pos_shift.py
    │   └── yi
    │       ├── __init__.py
    │       └── pos_shift.py
├── benchmark
    ├── outputs_btlm_3b_8k_base_2k_win
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_falcon_7b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_gptj_6b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_llama_2_7b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_mistral_7b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_mpt_7b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_pythia_6.9b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_qwen_14b
    │   ├── attention_sinks.csv
    │   └── transformers.csv
    ├── outputs_qwen_7b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_stablelm_3b_4e1t
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── outputs_yi_6b
    │   ├── attention_sinks.csv
    │   ├── transformers.csv
    │   └── windowed.csv
    ├── perplexity.py
    ├── plot_perplexity.py
    └── scripts
    │   ├── benchmark_btlm.sh
    │   ├── benchmark_falcon.sh
    │   ├── benchmark_gptj.sh
    │   ├── benchmark_llama.sh
    │   ├── benchmark_mistral.sh
    │   ├── benchmark_mpt.sh
    │   ├── benchmark_pythia.sh
    │   ├── benchmark_qwen.sh
    │   ├── benchmark_stablelm_epoch.sh
    │   └── benchmark_yi.sh
├── demo
    ├── endless_generation.py
    ├── endless_logs
    │   ├── attention_sinks
    │   │   └── meta-llama
    │   │   │   └── Llama-2-7b-hf.txt
    │   ├── transformers
    │   │   └── meta-llama
    │   │   │   └── Llama-2-7b-hf.txt
    │   └── windowed
    │   │   └── meta-llama
    │   │       └── Llama-2-7b-hf.txt
    ├── streaming.py
    ├── streaming_logs
    │   ├── attention_sinks
    │   │   ├── HuggingFaceH4
    │   │   │   └── zephyr-7b-alpha.txt
    │   │   ├── meta-llama
    │   │   │   └── Llama-2-7b-chat-hf.txt
    │   │   ├── mistralai
    │   │   │   └── Mistral-7B-Instruct-v0.1.txt
    │   │   └── mosaicml
    │   │   │   └── mpt-7b-chat.txt
    │   ├── transformers
    │   │   ├── HuggingFaceH4
    │   │   │   └── zephyr-7b-alpha.txt
    │   │   ├── meta-llama
    │   │   │   └── Llama-2-7b-chat-hf.txt
    │   │   ├── mistralai
    │   │   │   └── Mistral-7B-Instruct-v0.1.txt
    │   │   └── mosaicml
    │   │   │   └── mpt-7b-chat.txt
    │   └── windowed
    │   │   ├── HuggingFaceH4
    │   │       └── zephyr-7b-alpha.txt
    │   │   ├── meta-llama
    │   │       └── Llama-2-7b-chat-hf.txt
    │   │   ├── mistralai
    │   │       └── Mistral-7B-Instruct-v0.1.txt
    │   │   └── mosaicml
    │   │       └── mpt-7b-chat.txt
    └── utils.py
├── pyproject.toml
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/.gitignore


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/CHANGELOG.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/README.md


--------------------------------------------------------------------------------
/attention_sinks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/attention_sink_kv_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/attention_sink_kv_cache.py


--------------------------------------------------------------------------------
/attention_sinks/generation/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/generation/utils.py


--------------------------------------------------------------------------------
/attention_sinks/inject_mixin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/inject_mixin.py


--------------------------------------------------------------------------------
/attention_sinks/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/auto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/auto/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/auto/modeling_auto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/auto/modeling_auto.py


--------------------------------------------------------------------------------
/attention_sinks/models/falcon/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/falcon/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/falcon/modeling_falcon.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/falcon/modeling_falcon.py


--------------------------------------------------------------------------------
/attention_sinks/models/falcon/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/falcon/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/gpt_neox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gpt_neox/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/gpt_neox/modeling_gpt_neox.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gpt_neox/modeling_gpt_neox.py


--------------------------------------------------------------------------------
/attention_sinks/models/gpt_neox/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gpt_neox/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/gptj/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gptj/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/gptj/modeling_gptj.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gptj/modeling_gptj.py


--------------------------------------------------------------------------------
/attention_sinks/models/gptj/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/gptj/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/llama/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/llama/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/llama/modeling_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/llama/modeling_llama.py


--------------------------------------------------------------------------------
/attention_sinks/models/llama/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/llama/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/mistral/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mistral/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/mistral/modeling_mistral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mistral/modeling_mistral.py


--------------------------------------------------------------------------------
/attention_sinks/models/mistral/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mistral/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/mpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mpt/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/mpt/modeling_mpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/mpt/modeling_mpt.py


--------------------------------------------------------------------------------
/attention_sinks/models/qwen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/qwen/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/qwen/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/qwen/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/stablelm_epoch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/stablelm_epoch/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/stablelm_epoch/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/stablelm_epoch/pos_shift.py


--------------------------------------------------------------------------------
/attention_sinks/models/yi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/yi/__init__.py


--------------------------------------------------------------------------------
/attention_sinks/models/yi/pos_shift.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/attention_sinks/models/yi/pos_shift.py


--------------------------------------------------------------------------------
/benchmark/outputs_btlm_3b_8k_base_2k_win/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_btlm_3b_8k_base_2k_win/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_btlm_3b_8k_base_2k_win/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_btlm_3b_8k_base_2k_win/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_btlm_3b_8k_base_2k_win/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_btlm_3b_8k_base_2k_win/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_falcon_7b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_falcon_7b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_falcon_7b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_falcon_7b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_falcon_7b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_falcon_7b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_gptj_6b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_gptj_6b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_gptj_6b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_gptj_6b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_gptj_6b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_gptj_6b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_llama_2_7b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_llama_2_7b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_llama_2_7b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_llama_2_7b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_llama_2_7b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_llama_2_7b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_mistral_7b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mistral_7b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_mistral_7b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mistral_7b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_mistral_7b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mistral_7b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_mpt_7b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mpt_7b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_mpt_7b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mpt_7b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_mpt_7b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_mpt_7b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_pythia_6.9b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_pythia_6.9b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_pythia_6.9b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_pythia_6.9b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_pythia_6.9b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_pythia_6.9b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_qwen_14b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_14b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_qwen_14b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_14b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_qwen_7b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_7b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_qwen_7b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_7b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_qwen_7b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_qwen_7b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_stablelm_3b_4e1t/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_stablelm_3b_4e1t/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_stablelm_3b_4e1t/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_stablelm_3b_4e1t/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_stablelm_3b_4e1t/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_stablelm_3b_4e1t/windowed.csv


--------------------------------------------------------------------------------
/benchmark/outputs_yi_6b/attention_sinks.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_yi_6b/attention_sinks.csv


--------------------------------------------------------------------------------
/benchmark/outputs_yi_6b/transformers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_yi_6b/transformers.csv


--------------------------------------------------------------------------------
/benchmark/outputs_yi_6b/windowed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/outputs_yi_6b/windowed.csv


--------------------------------------------------------------------------------
/benchmark/perplexity.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/perplexity.py


--------------------------------------------------------------------------------
/benchmark/plot_perplexity.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/plot_perplexity.py


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_btlm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_btlm.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_falcon.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_falcon.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_gptj.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_gptj.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_llama.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_mistral.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_mistral.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_mpt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_mpt.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_pythia.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_pythia.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_qwen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_qwen.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_stablelm_epoch.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_stablelm_epoch.sh


--------------------------------------------------------------------------------
/benchmark/scripts/benchmark_yi.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/benchmark/scripts/benchmark_yi.sh


--------------------------------------------------------------------------------
/demo/endless_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_generation.py


--------------------------------------------------------------------------------
/demo/endless_logs/attention_sinks/meta-llama/Llama-2-7b-hf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_logs/attention_sinks/meta-llama/Llama-2-7b-hf.txt


--------------------------------------------------------------------------------
/demo/endless_logs/transformers/meta-llama/Llama-2-7b-hf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_logs/transformers/meta-llama/Llama-2-7b-hf.txt


--------------------------------------------------------------------------------
/demo/endless_logs/windowed/meta-llama/Llama-2-7b-hf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/endless_logs/windowed/meta-llama/Llama-2-7b-hf.txt


--------------------------------------------------------------------------------
/demo/streaming.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming.py


--------------------------------------------------------------------------------
/demo/streaming_logs/attention_sinks/HuggingFaceH4/zephyr-7b-alpha.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/HuggingFaceH4/zephyr-7b-alpha.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/attention_sinks/meta-llama/Llama-2-7b-chat-hf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/meta-llama/Llama-2-7b-chat-hf.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/attention_sinks/mistralai/Mistral-7B-Instruct-v0.1.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/mistralai/Mistral-7B-Instruct-v0.1.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/attention_sinks/mosaicml/mpt-7b-chat.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/attention_sinks/mosaicml/mpt-7b-chat.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/transformers/HuggingFaceH4/zephyr-7b-alpha.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/HuggingFaceH4/zephyr-7b-alpha.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/transformers/meta-llama/Llama-2-7b-chat-hf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/meta-llama/Llama-2-7b-chat-hf.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/transformers/mistralai/Mistral-7B-Instruct-v0.1.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/mistralai/Mistral-7B-Instruct-v0.1.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/transformers/mosaicml/mpt-7b-chat.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/transformers/mosaicml/mpt-7b-chat.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/windowed/HuggingFaceH4/zephyr-7b-alpha.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/HuggingFaceH4/zephyr-7b-alpha.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/windowed/meta-llama/Llama-2-7b-chat-hf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/meta-llama/Llama-2-7b-chat-hf.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/windowed/mistralai/Mistral-7B-Instruct-v0.1.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/mistralai/Mistral-7B-Instruct-v0.1.txt


--------------------------------------------------------------------------------
/demo/streaming_logs/windowed/mosaicml/mpt-7b-chat.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/streaming_logs/windowed/mosaicml/mpt-7b-chat.txt


--------------------------------------------------------------------------------
/demo/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/demo/utils.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomaarsen/attention_sinks/HEAD/setup.py


--------------------------------------------------------------------------------