├── .dockerignore ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .pre-commit-config.yaml ├── AVERAGE.md ├── LICENSE ├── MANIFEST.in ├── MOE.md ├── Makefile ├── README.md ├── environment-tests.yml ├── environment.yml ├── eval ├── eval_openlm_ckpt.py ├── in_memory_hf_eval.yaml └── local_data │ ├── .gitignore │ ├── arc_challenge.jsonl │ ├── arc_easy.jsonl │ ├── boolq.jsonl │ ├── copa.jsonl │ ├── hellaswag.jsonl │ ├── jeopardy_all.jsonl │ ├── lambada_openai.jsonl │ ├── mmlu.jsonl │ ├── piqa.jsonl │ ├── triviaqa.jsonl │ ├── winograd_wsc.jsonl │ └── winogrande.jsonl ├── open_lm ├── __init__.py ├── attention.py ├── data.py ├── datapreprocess │ ├── __init__.py │ ├── docs │ │ └── ray_cluster_setup.md │ ├── make_2048.py │ ├── make_assistant_data.py │ ├── metadata │ │ └── rpj_lm_data.yaml │ ├── ray │ │ ├── __init__.py │ │ ├── ray_cluster_configs │ │ │ └── cluster_west.yaml │ │ ├── readme.md │ │ ├── token_counter.py │ │ └── tokenize_shuffle.py │ └── wiki_download.py ├── distributed.py ├── evaluate.py ├── file_utils.py ├── logger.py ├── losses.py ├── main.py ├── meters.py ├── model.py ├── model_configs │ ├── __init__.py │ ├── ant_neox.json │ ├── aphid_neox.json │ ├── atom_neox.json │ ├── g3b_neox.json │ ├── l7b_neox.json │ ├── linear_1b.json │ ├── linear_7b.json │ ├── linear_tiny.json │ ├── llama2_7b.json │ ├── m1b_neox.json │ ├── m1b_tiktoken.json │ ├── mamba_130m.json │ ├── mamba_1b.json │ ├── mamba_7b.json │ ├── marmot_neox.json │ ├── mistral_7b.json │ ├── mistral_7b_linear.json │ ├── open_lm_11m.json │ ├── open_lm_11m_v2.json │ ├── open_lm_154m_v2.json │ ├── open_lm_160m.json │ ├── open_lm_1b.json │ ├── open_lm_1b_old.json │ ├── open_lm_25m.json │ ├── open_lm_3b.json │ ├── open_lm_410m.json │ ├── open_lm_411m_v2.json │ ├── open_lm_41m.json │ ├── open_lm_79m_v2.json │ ├── open_lm_7b.json │ ├── open_lm_830m.json │ ├── open_lm_87m.json │ ├── open_lm_test_tiny.json │ ├── potato_neox.json │ └── quark_neox.json ├── norms.py ├── open_lm_hf │ ├── __init__.py │ ├── configuration_openlm.py │ ├── modeling_openlm.py │ └── tokenization_openlm.py ├── params.py ├── positional_embedding │ ├── __init__.py │ ├── head_rotary.py │ ├── llama_rotary.py │ ├── none.py │ └── rotary.py ├── precision.py ├── run_bench.sh ├── scheduler.py ├── tests │ └── test_accumulation.py ├── train.py └── utils │ ├── __init__.py │ ├── averaging_utils.py │ ├── convert_llama.py │ ├── llm_foundry_wrapper.py │ ├── make_wds_manifest.py │ ├── transformers │ ├── __init__.py │ ├── convert_to_hf.py │ ├── generation.py │ ├── hf_config.py │ ├── hf_model.py │ └── hf_wrapper.py │ ├── update_manifest.py │ └── verify_converted_llama.ipynb ├── plots ├── fig1.png ├── interpolation.png ├── interpolation.py └── logo.png ├── pyproject.toml ├── requirements.txt ├── requirements_test.txt ├── sagemaker_train ├── .dockerignore ├── Dockerfile ├── Dockerfile_update ├── cfg_sample.yaml └── launch_sagemaker_train.py ├── scripts ├── generate.py ├── generate_without_hf.py └── train_example.sh ├── setup.py └── tests ├── __init__.py ├── assets └── 2049_span_pad.json ├── shared.py ├── test_attention_masking.py ├── test_custom_attention.py ├── test_dataset_basic.py ├── test_dataset_deterministic.py ├── test_dataset_no_resample.py ├── test_file_utils.py ├── test_generate_kv_cache_time.py ├── test_generate_load_kv_cache_equal.py ├── test_grad_accum.py ├── test_loss_masking.py ├── test_make_wds_manifest.py ├── test_param_parsing.py ├── test_save_load.py ├── test_save_load_from_main.py ├── test_tiny_generate_kv_cache_equal.py ├── test_tokenize_shuffle.py ├── test_training_simple.py ├── test_training_tokens.py └── utils.py /.dockerignore: -------------------------------------------------------------------------------- 1 | venv 2 | wandb 3 | logs 4 | checkpoints 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /AVERAGE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/AVERAGE.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include open_lm/model_configs/*.json -------------------------------------------------------------------------------- /MOE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/MOE.md -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/README.md -------------------------------------------------------------------------------- /environment-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/environment-tests.yml -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/environment.yml -------------------------------------------------------------------------------- /eval/eval_openlm_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/eval_openlm_ckpt.py -------------------------------------------------------------------------------- /eval/in_memory_hf_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/in_memory_hf_eval.yaml -------------------------------------------------------------------------------- /eval/local_data/.gitignore: -------------------------------------------------------------------------------- 1 | !* 2 | -------------------------------------------------------------------------------- /eval/local_data/arc_challenge.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/arc_challenge.jsonl -------------------------------------------------------------------------------- /eval/local_data/arc_easy.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/arc_easy.jsonl -------------------------------------------------------------------------------- /eval/local_data/boolq.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/boolq.jsonl -------------------------------------------------------------------------------- /eval/local_data/copa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/copa.jsonl -------------------------------------------------------------------------------- /eval/local_data/hellaswag.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/hellaswag.jsonl -------------------------------------------------------------------------------- /eval/local_data/jeopardy_all.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/jeopardy_all.jsonl -------------------------------------------------------------------------------- /eval/local_data/lambada_openai.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/lambada_openai.jsonl -------------------------------------------------------------------------------- /eval/local_data/mmlu.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/mmlu.jsonl -------------------------------------------------------------------------------- /eval/local_data/piqa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/piqa.jsonl -------------------------------------------------------------------------------- /eval/local_data/triviaqa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/triviaqa.jsonl -------------------------------------------------------------------------------- /eval/local_data/winograd_wsc.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/winograd_wsc.jsonl -------------------------------------------------------------------------------- /eval/local_data/winogrande.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/eval/local_data/winogrande.jsonl -------------------------------------------------------------------------------- /open_lm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_lm/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/attention.py -------------------------------------------------------------------------------- /open_lm/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/data.py -------------------------------------------------------------------------------- /open_lm/datapreprocess/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open_lm/datapreprocess/docs/ray_cluster_setup.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/docs/ray_cluster_setup.md -------------------------------------------------------------------------------- /open_lm/datapreprocess/make_2048.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/make_2048.py -------------------------------------------------------------------------------- /open_lm/datapreprocess/make_assistant_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/make_assistant_data.py -------------------------------------------------------------------------------- /open_lm/datapreprocess/metadata/rpj_lm_data.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/metadata/rpj_lm_data.yaml -------------------------------------------------------------------------------- /open_lm/datapreprocess/ray/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /open_lm/datapreprocess/ray/ray_cluster_configs/cluster_west.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/ray/ray_cluster_configs/cluster_west.yaml -------------------------------------------------------------------------------- /open_lm/datapreprocess/ray/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/ray/readme.md -------------------------------------------------------------------------------- /open_lm/datapreprocess/ray/token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/ray/token_counter.py -------------------------------------------------------------------------------- /open_lm/datapreprocess/ray/tokenize_shuffle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/ray/tokenize_shuffle.py -------------------------------------------------------------------------------- /open_lm/datapreprocess/wiki_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/datapreprocess/wiki_download.py -------------------------------------------------------------------------------- /open_lm/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/distributed.py -------------------------------------------------------------------------------- /open_lm/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/evaluate.py -------------------------------------------------------------------------------- /open_lm/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/file_utils.py -------------------------------------------------------------------------------- /open_lm/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/logger.py -------------------------------------------------------------------------------- /open_lm/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/losses.py -------------------------------------------------------------------------------- /open_lm/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/main.py -------------------------------------------------------------------------------- /open_lm/meters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/meters.py -------------------------------------------------------------------------------- /open_lm/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model.py -------------------------------------------------------------------------------- /open_lm/model_configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_lm/model_configs/ant_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/ant_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/aphid_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/aphid_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/atom_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/atom_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/g3b_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/g3b_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/l7b_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/l7b_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/linear_1b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/linear_1b.json -------------------------------------------------------------------------------- /open_lm/model_configs/linear_7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/linear_7b.json -------------------------------------------------------------------------------- /open_lm/model_configs/linear_tiny.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/linear_tiny.json -------------------------------------------------------------------------------- /open_lm/model_configs/llama2_7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/llama2_7b.json -------------------------------------------------------------------------------- /open_lm/model_configs/m1b_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/m1b_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/m1b_tiktoken.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/m1b_tiktoken.json -------------------------------------------------------------------------------- /open_lm/model_configs/mamba_130m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/mamba_130m.json -------------------------------------------------------------------------------- /open_lm/model_configs/mamba_1b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/mamba_1b.json -------------------------------------------------------------------------------- /open_lm/model_configs/mamba_7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/mamba_7b.json -------------------------------------------------------------------------------- /open_lm/model_configs/marmot_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/marmot_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/mistral_7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/mistral_7b.json -------------------------------------------------------------------------------- /open_lm/model_configs/mistral_7b_linear.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/mistral_7b_linear.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_11m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_11m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_11m_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_11m_v2.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_154m_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_154m_v2.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_160m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_160m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_1b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_1b.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_1b_old.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_1b_old.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_25m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_25m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_3b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_3b.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_410m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_410m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_411m_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_411m_v2.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_41m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_41m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_79m_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_79m_v2.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_7b.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_830m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_830m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_87m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_87m.json -------------------------------------------------------------------------------- /open_lm/model_configs/open_lm_test_tiny.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/open_lm_test_tiny.json -------------------------------------------------------------------------------- /open_lm/model_configs/potato_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/potato_neox.json -------------------------------------------------------------------------------- /open_lm/model_configs/quark_neox.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/model_configs/quark_neox.json -------------------------------------------------------------------------------- /open_lm/norms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/norms.py -------------------------------------------------------------------------------- /open_lm/open_lm_hf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/open_lm_hf/__init__.py -------------------------------------------------------------------------------- /open_lm/open_lm_hf/configuration_openlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/open_lm_hf/configuration_openlm.py -------------------------------------------------------------------------------- /open_lm/open_lm_hf/modeling_openlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/open_lm_hf/modeling_openlm.py -------------------------------------------------------------------------------- /open_lm/open_lm_hf/tokenization_openlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/open_lm_hf/tokenization_openlm.py -------------------------------------------------------------------------------- /open_lm/params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/params.py -------------------------------------------------------------------------------- /open_lm/positional_embedding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_lm/positional_embedding/head_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/positional_embedding/head_rotary.py -------------------------------------------------------------------------------- /open_lm/positional_embedding/llama_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/positional_embedding/llama_rotary.py -------------------------------------------------------------------------------- /open_lm/positional_embedding/none.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/positional_embedding/none.py -------------------------------------------------------------------------------- /open_lm/positional_embedding/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/positional_embedding/rotary.py -------------------------------------------------------------------------------- /open_lm/precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/precision.py -------------------------------------------------------------------------------- /open_lm/run_bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/run_bench.sh -------------------------------------------------------------------------------- /open_lm/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/scheduler.py -------------------------------------------------------------------------------- /open_lm/tests/test_accumulation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/tests/test_accumulation.py -------------------------------------------------------------------------------- /open_lm/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/train.py -------------------------------------------------------------------------------- /open_lm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_lm/utils/averaging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/averaging_utils.py -------------------------------------------------------------------------------- /open_lm/utils/convert_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/convert_llama.py -------------------------------------------------------------------------------- /open_lm/utils/llm_foundry_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/llm_foundry_wrapper.py -------------------------------------------------------------------------------- /open_lm/utils/make_wds_manifest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/make_wds_manifest.py -------------------------------------------------------------------------------- /open_lm/utils/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /open_lm/utils/transformers/convert_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/transformers/convert_to_hf.py -------------------------------------------------------------------------------- /open_lm/utils/transformers/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/transformers/generation.py -------------------------------------------------------------------------------- /open_lm/utils/transformers/hf_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/transformers/hf_config.py -------------------------------------------------------------------------------- /open_lm/utils/transformers/hf_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/transformers/hf_model.py -------------------------------------------------------------------------------- /open_lm/utils/transformers/hf_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/transformers/hf_wrapper.py -------------------------------------------------------------------------------- /open_lm/utils/update_manifest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/update_manifest.py -------------------------------------------------------------------------------- /open_lm/utils/verify_converted_llama.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/open_lm/utils/verify_converted_llama.ipynb -------------------------------------------------------------------------------- /plots/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/plots/fig1.png -------------------------------------------------------------------------------- /plots/interpolation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/plots/interpolation.png -------------------------------------------------------------------------------- /plots/interpolation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/plots/interpolation.py -------------------------------------------------------------------------------- /plots/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/plots/logo.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/requirements_test.txt -------------------------------------------------------------------------------- /sagemaker_train/.dockerignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sagemaker_train/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/sagemaker_train/Dockerfile -------------------------------------------------------------------------------- /sagemaker_train/Dockerfile_update: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/sagemaker_train/Dockerfile_update -------------------------------------------------------------------------------- /sagemaker_train/cfg_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/sagemaker_train/cfg_sample.yaml -------------------------------------------------------------------------------- /sagemaker_train/launch_sagemaker_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/sagemaker_train/launch_sagemaker_train.py -------------------------------------------------------------------------------- /scripts/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/scripts/generate.py -------------------------------------------------------------------------------- /scripts/generate_without_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/scripts/generate_without_hf.py -------------------------------------------------------------------------------- /scripts/train_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/scripts/train_example.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/assets/2049_span_pad.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/assets/2049_span_pad.json -------------------------------------------------------------------------------- /tests/shared.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/shared.py -------------------------------------------------------------------------------- /tests/test_attention_masking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_attention_masking.py -------------------------------------------------------------------------------- /tests/test_custom_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_custom_attention.py -------------------------------------------------------------------------------- /tests/test_dataset_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_dataset_basic.py -------------------------------------------------------------------------------- /tests/test_dataset_deterministic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_dataset_deterministic.py -------------------------------------------------------------------------------- /tests/test_dataset_no_resample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_dataset_no_resample.py -------------------------------------------------------------------------------- /tests/test_file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_file_utils.py -------------------------------------------------------------------------------- /tests/test_generate_kv_cache_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_generate_kv_cache_time.py -------------------------------------------------------------------------------- /tests/test_generate_load_kv_cache_equal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_generate_load_kv_cache_equal.py -------------------------------------------------------------------------------- /tests/test_grad_accum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_grad_accum.py -------------------------------------------------------------------------------- /tests/test_loss_masking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_loss_masking.py -------------------------------------------------------------------------------- /tests/test_make_wds_manifest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_make_wds_manifest.py -------------------------------------------------------------------------------- /tests/test_param_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_param_parsing.py -------------------------------------------------------------------------------- /tests/test_save_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_save_load.py -------------------------------------------------------------------------------- /tests/test_save_load_from_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_save_load_from_main.py -------------------------------------------------------------------------------- /tests/test_tiny_generate_kv_cache_equal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_tiny_generate_kv_cache_equal.py -------------------------------------------------------------------------------- /tests/test_tokenize_shuffle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_tokenize_shuffle.py -------------------------------------------------------------------------------- /tests/test_training_simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_training_simple.py -------------------------------------------------------------------------------- /tests/test_training_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/test_training_tokens.py -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/linear_open_lm/HEAD/tests/utils.py --------------------------------------------------------------------------------