├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── huggingface ├── README.md ├── canon_helper.py ├── configuration_llama_canon.py ├── demo_newmodel.py ├── demo_pretrained.py └── modeling_llama_canon.py ├── lingua_modified ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── apps │ ├── __init__.py │ ├── aunet │ │ ├── README.md │ │ ├── __init__.py │ │ ├── assets │ │ │ └── AUNet.jpg │ │ ├── configs │ │ │ ├── 2B_1level.yaml │ │ │ └── 2B_2level.yaml │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ └── regex_cutting.py │ │ ├── eval.py │ │ ├── generate.py │ │ ├── hierarchical.py │ │ ├── index_matmul.py │ │ └── train.py │ ├── fastRNN │ │ ├── component │ │ │ ├── compilable_scan.py │ │ │ └── rnn_common.py │ │ ├── eval.py │ │ ├── generate.py │ │ ├── hawk │ │ │ ├── configs │ │ │ │ ├── debug.yaml │ │ │ │ ├── hawk_1b.yaml │ │ │ │ └── hawk_7b.yaml │ │ │ ├── core_hawk.py │ │ │ └── hawk.py │ │ ├── minGRU │ │ │ ├── configs │ │ │ │ ├── debug.yaml │ │ │ │ └── minGRU_1b.yaml │ │ │ ├── core_gru.py │ │ │ └── mingru.py │ │ ├── minLSTM │ │ │ ├── configs │ │ │ │ ├── debug.yaml │ │ │ │ └── minLSTM_1b.yaml │ │ │ ├── core_lstm.py │ │ │ └── minlstm.py │ │ ├── requirements.txt │ │ └── train.py │ ├── main │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── canon_1B.yaml │ │ │ ├── debug.yaml │ │ │ ├── eval.yaml │ │ │ ├── llama_1B.yaml │ │ │ └── llama_7B.yaml │ │ ├── eval.py │ │ ├── generate.py │ │ ├── train.py │ │ └── transformer.py │ ├── mamba │ │ ├── __init__.py │ │ ├── component │ │ │ ├── __init__.py │ │ │ ├── causal_conv1d_compilable.py │ │ │ └── ssm_compilable.py │ │ ├── configs │ │ │ ├── debug.yaml │ │ │ ├── eval.yaml │ │ │ ├── mamba_1b.yaml │ │ │ └── mamba_7b.yaml │ │ ├── core_mamba.py │ │ ├── eval.py │ │ ├── generate.py │ │ ├── mamba.py │ │ ├── requirements.txt │ │ └── train.py │ ├── mtp │ │ ├── configs │ │ │ └── debug.yaml │ │ ├── eval.py │ │ ├── train.py │ │ └── transformer.py │ └── plots │ │ ├── __init__.py │ │ ├── analysis.py │ │ └── probe_animation.py ├── dataloader.png ├── lingua │ ├── __init__.py │ ├── args.py │ ├── canon_helper.py │ ├── checkpoint.py │ ├── data.py │ ├── distributed.py │ ├── float8.py │ ├── logger.py │ ├── metrics.py │ ├── optim.py │ ├── probe.py │ ├── profiling.py │ ├── stool.py │ ├── tokenizer.py │ └── transformer.py ├── lingua_overview.svg ├── requirements.txt └── setup │ ├── create_env.sh │ ├── download_prepare_hf_data.py │ └── download_tokenizer.py ├── lingua_recipes ├── Llama-1B-Nemo-1T-lr0.002.yaml ├── Llama-1B-Nemo-1T-lr0.003.yaml ├── Llama-1B-Nemo-2T-lr0.003.yaml ├── Llama-1B-Nemo-2T-lr0.005.yaml ├── Llama-3B-Nemo-1T-lr0.002.yaml ├── Llama-3B-Nemo-1T-lr0.003.yaml ├── Llama-8B-Nemo-1T-lr0.002.yaml ├── Llama-8B-Nemo-1T-lr0.003.yaml ├── LlamaCanon-1B-Nemo-1T-lr0.002.yaml ├── LlamaCanon-1B-Nemo-1T-lr0.003.yaml ├── LlamaCanon-1B-Nemo-2T-lr0.003.yaml ├── LlamaCanon-1B-Nemo-2T-lr0.005.yaml ├── LlamaCanon-3B-Nemo-1T-lr0.002.yaml ├── LlamaCanon-3B-Nemo-1T-lr0.003.yaml ├── LlamaCanon-8B-Nemo-1T-lr0.002.yaml ├── LlamaCanon-8B-Nemo-1T-lr0.003.yaml └── README.md └── lingua_results ├── README.md ├── curve-mmlu-bos.png ├── curve-mmlu.png ├── model-training-time.png ├── table-params.png ├── table-performance.png └── training-curves-interactive.html /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/README.md -------------------------------------------------------------------------------- /huggingface/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/huggingface/README.md -------------------------------------------------------------------------------- /huggingface/canon_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/huggingface/canon_helper.py -------------------------------------------------------------------------------- /huggingface/configuration_llama_canon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/huggingface/configuration_llama_canon.py -------------------------------------------------------------------------------- /huggingface/demo_newmodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/huggingface/demo_newmodel.py -------------------------------------------------------------------------------- /huggingface/demo_pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/huggingface/demo_pretrained.py -------------------------------------------------------------------------------- /huggingface/modeling_llama_canon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/huggingface/modeling_llama_canon.py -------------------------------------------------------------------------------- /lingua_modified/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vscode 3 | *.ipynb 4 | slurm-*.out 5 | wandb -------------------------------------------------------------------------------- /lingua_modified/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /lingua_modified/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/CONTRIBUTING.md -------------------------------------------------------------------------------- /lingua_modified/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/LICENSE -------------------------------------------------------------------------------- /lingua_modified/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/README.md -------------------------------------------------------------------------------- /lingua_modified/apps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/README.md -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/assets/AUNet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/assets/AUNet.jpg -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/configs/2B_1level.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/configs/2B_1level.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/configs/2B_2level.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/configs/2B_2level.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/data/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/data/data.py -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/data/regex_cutting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/data/regex_cutting.py -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/eval.py -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/generate.py -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/hierarchical.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/hierarchical.py -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/index_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/index_matmul.py -------------------------------------------------------------------------------- /lingua_modified/apps/aunet/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/aunet/train.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/component/compilable_scan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/component/compilable_scan.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/component/rnn_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/component/rnn_common.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/eval.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/generate.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/hawk/configs/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/hawk/configs/debug.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/hawk/configs/hawk_1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/hawk/configs/hawk_1b.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/hawk/configs/hawk_7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/hawk/configs/hawk_7b.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/hawk/core_hawk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/hawk/core_hawk.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/hawk/hawk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/hawk/hawk.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minGRU/configs/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minGRU/configs/debug.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minGRU/configs/minGRU_1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minGRU/configs/minGRU_1b.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minGRU/core_gru.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minGRU/core_gru.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minGRU/mingru.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minGRU/mingru.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minLSTM/configs/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minLSTM/configs/debug.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minLSTM/configs/minLSTM_1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minLSTM/configs/minLSTM_1b.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minLSTM/core_lstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minLSTM/core_lstm.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/minLSTM/minlstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/minLSTM/minlstm.py -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/requirements.txt -------------------------------------------------------------------------------- /lingua_modified/apps/fastRNN/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/fastRNN/train.py -------------------------------------------------------------------------------- /lingua_modified/apps/main/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/main/configs/canon_1B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/configs/canon_1B.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/main/configs/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/configs/debug.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/main/configs/eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/configs/eval.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/main/configs/llama_1B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/configs/llama_1B.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/main/configs/llama_7B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/configs/llama_7B.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/main/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/eval.py -------------------------------------------------------------------------------- /lingua_modified/apps/main/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/generate.py -------------------------------------------------------------------------------- /lingua_modified/apps/main/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/train.py -------------------------------------------------------------------------------- /lingua_modified/apps/main/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/main/transformer.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/component/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/component/causal_conv1d_compilable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/component/causal_conv1d_compilable.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/component/ssm_compilable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/component/ssm_compilable.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/configs/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/configs/debug.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/configs/eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/configs/eval.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/configs/mamba_1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/configs/mamba_1b.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/configs/mamba_7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/configs/mamba_7b.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/core_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/core_mamba.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/eval.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/generate.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/mamba.py -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/requirements.txt -------------------------------------------------------------------------------- /lingua_modified/apps/mamba/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mamba/train.py -------------------------------------------------------------------------------- /lingua_modified/apps/mtp/configs/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mtp/configs/debug.yaml -------------------------------------------------------------------------------- /lingua_modified/apps/mtp/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mtp/eval.py -------------------------------------------------------------------------------- /lingua_modified/apps/mtp/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mtp/train.py -------------------------------------------------------------------------------- /lingua_modified/apps/mtp/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/mtp/transformer.py -------------------------------------------------------------------------------- /lingua_modified/apps/plots/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/apps/plots/analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/plots/analysis.py -------------------------------------------------------------------------------- /lingua_modified/apps/plots/probe_animation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/apps/plots/probe_animation.py -------------------------------------------------------------------------------- /lingua_modified/dataloader.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/dataloader.png -------------------------------------------------------------------------------- /lingua_modified/lingua/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lingua_modified/lingua/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/args.py -------------------------------------------------------------------------------- /lingua_modified/lingua/canon_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/canon_helper.py -------------------------------------------------------------------------------- /lingua_modified/lingua/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/checkpoint.py -------------------------------------------------------------------------------- /lingua_modified/lingua/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/data.py -------------------------------------------------------------------------------- /lingua_modified/lingua/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/distributed.py -------------------------------------------------------------------------------- /lingua_modified/lingua/float8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/float8.py -------------------------------------------------------------------------------- /lingua_modified/lingua/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/logger.py -------------------------------------------------------------------------------- /lingua_modified/lingua/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/metrics.py -------------------------------------------------------------------------------- /lingua_modified/lingua/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/optim.py -------------------------------------------------------------------------------- /lingua_modified/lingua/probe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/probe.py -------------------------------------------------------------------------------- /lingua_modified/lingua/profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/profiling.py -------------------------------------------------------------------------------- /lingua_modified/lingua/stool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/stool.py -------------------------------------------------------------------------------- /lingua_modified/lingua/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/tokenizer.py -------------------------------------------------------------------------------- /lingua_modified/lingua/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua/transformer.py -------------------------------------------------------------------------------- /lingua_modified/lingua_overview.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/lingua_overview.svg -------------------------------------------------------------------------------- /lingua_modified/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/requirements.txt -------------------------------------------------------------------------------- /lingua_modified/setup/create_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/setup/create_env.sh -------------------------------------------------------------------------------- /lingua_modified/setup/download_prepare_hf_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/setup/download_prepare_hf_data.py -------------------------------------------------------------------------------- /lingua_modified/setup/download_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_modified/setup/download_tokenizer.py -------------------------------------------------------------------------------- /lingua_recipes/Llama-1B-Nemo-1T-lr0.002.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-1B-Nemo-1T-lr0.002.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-1B-Nemo-1T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-1B-Nemo-1T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-1B-Nemo-2T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-1B-Nemo-2T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-1B-Nemo-2T-lr0.005.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-1B-Nemo-2T-lr0.005.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-3B-Nemo-1T-lr0.002.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-3B-Nemo-1T-lr0.002.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-3B-Nemo-1T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-3B-Nemo-1T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-8B-Nemo-1T-lr0.002.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-8B-Nemo-1T-lr0.002.yaml -------------------------------------------------------------------------------- /lingua_recipes/Llama-8B-Nemo-1T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/Llama-8B-Nemo-1T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-1B-Nemo-1T-lr0.002.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-1B-Nemo-1T-lr0.002.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-1B-Nemo-1T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-1B-Nemo-1T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-1B-Nemo-2T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-1B-Nemo-2T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-1B-Nemo-2T-lr0.005.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-1B-Nemo-2T-lr0.005.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-3B-Nemo-1T-lr0.002.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-3B-Nemo-1T-lr0.002.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-3B-Nemo-1T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-3B-Nemo-1T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-8B-Nemo-1T-lr0.002.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-8B-Nemo-1T-lr0.002.yaml -------------------------------------------------------------------------------- /lingua_recipes/LlamaCanon-8B-Nemo-1T-lr0.003.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/LlamaCanon-8B-Nemo-1T-lr0.003.yaml -------------------------------------------------------------------------------- /lingua_recipes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_recipes/README.md -------------------------------------------------------------------------------- /lingua_results/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/README.md -------------------------------------------------------------------------------- /lingua_results/curve-mmlu-bos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/curve-mmlu-bos.png -------------------------------------------------------------------------------- /lingua_results/curve-mmlu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/curve-mmlu.png -------------------------------------------------------------------------------- /lingua_results/model-training-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/model-training-time.png -------------------------------------------------------------------------------- /lingua_results/table-params.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/table-params.png -------------------------------------------------------------------------------- /lingua_results/table-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/table-performance.png -------------------------------------------------------------------------------- /lingua_results/training-curves-interactive.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PhysicsLM4/HEAD/lingua_results/training-curves-interactive.html --------------------------------------------------------------------------------