├── .gitignore ├── LICENSE ├── README.md ├── data ├── config │ ├── even.json │ ├── fsdp-offload.yaml │ ├── fsdp.yaml │ ├── slimpajama.json │ ├── zero3-infer-offload.yaml │ └── zero3-infer.yaml ├── deepspeed │ ├── stage2-offload.json │ ├── stage2.json │ ├── stage3-offload-optim.json │ ├── stage3-offload.json │ └── stage3.json └── toy │ └── nqa.json ├── docs ├── evaluation.md └── training.md ├── imgs └── ultragist.png ├── main ├── eval_generation.py ├── eval_infbench.py ├── eval_lm.py ├── eval_longbench.py ├── eval_mmlu.py ├── eval_msc.py ├── eval_multiturn.py ├── eval_needle.py ├── eval_passkey.py ├── eval_topic.py ├── infbench_utils.py ├── longbench_utils.py ├── pretrain_data.py └── train.py └── src ├── __init__.py ├── args.py ├── chat.py ├── data.py ├── llama ├── __init__.py ├── configuration_llama.py └── modeling_llama.py ├── metrics.py ├── mistral ├── __init__.py ├── configuration_mistral.py └── modeling_mistral.py ├── modeling_ultragist.py ├── modeling_utils.py ├── trainer.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/README.md -------------------------------------------------------------------------------- /data/config/even.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/config/even.json -------------------------------------------------------------------------------- /data/config/fsdp-offload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/config/fsdp-offload.yaml -------------------------------------------------------------------------------- /data/config/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/config/fsdp.yaml -------------------------------------------------------------------------------- /data/config/slimpajama.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/config/slimpajama.json -------------------------------------------------------------------------------- /data/config/zero3-infer-offload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/config/zero3-infer-offload.yaml -------------------------------------------------------------------------------- /data/config/zero3-infer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/config/zero3-infer.yaml -------------------------------------------------------------------------------- /data/deepspeed/stage2-offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/deepspeed/stage2-offload.json -------------------------------------------------------------------------------- /data/deepspeed/stage2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/deepspeed/stage2.json -------------------------------------------------------------------------------- /data/deepspeed/stage3-offload-optim.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/deepspeed/stage3-offload-optim.json -------------------------------------------------------------------------------- /data/deepspeed/stage3-offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/deepspeed/stage3-offload.json -------------------------------------------------------------------------------- /data/deepspeed/stage3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/deepspeed/stage3.json -------------------------------------------------------------------------------- /data/toy/nqa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/data/toy/nqa.json -------------------------------------------------------------------------------- /docs/evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/docs/evaluation.md -------------------------------------------------------------------------------- /docs/training.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/docs/training.md -------------------------------------------------------------------------------- /imgs/ultragist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/imgs/ultragist.png -------------------------------------------------------------------------------- /main/eval_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_generation.py -------------------------------------------------------------------------------- /main/eval_infbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_infbench.py -------------------------------------------------------------------------------- /main/eval_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_lm.py -------------------------------------------------------------------------------- /main/eval_longbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_longbench.py -------------------------------------------------------------------------------- /main/eval_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_mmlu.py -------------------------------------------------------------------------------- /main/eval_msc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_msc.py -------------------------------------------------------------------------------- /main/eval_multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_multiturn.py -------------------------------------------------------------------------------- /main/eval_needle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_needle.py -------------------------------------------------------------------------------- /main/eval_passkey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_passkey.py -------------------------------------------------------------------------------- /main/eval_topic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/eval_topic.py -------------------------------------------------------------------------------- /main/infbench_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/infbench_utils.py -------------------------------------------------------------------------------- /main/longbench_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/longbench_utils.py -------------------------------------------------------------------------------- /main/pretrain_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/pretrain_data.py -------------------------------------------------------------------------------- /main/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/main/train.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/__init__.py -------------------------------------------------------------------------------- /src/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/args.py -------------------------------------------------------------------------------- /src/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/chat.py -------------------------------------------------------------------------------- /src/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/data.py -------------------------------------------------------------------------------- /src/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/llama/__init__.py -------------------------------------------------------------------------------- /src/llama/configuration_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/llama/configuration_llama.py -------------------------------------------------------------------------------- /src/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/llama/modeling_llama.py -------------------------------------------------------------------------------- /src/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/metrics.py -------------------------------------------------------------------------------- /src/mistral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/mistral/__init__.py -------------------------------------------------------------------------------- /src/mistral/configuration_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/mistral/configuration_mistral.py -------------------------------------------------------------------------------- /src/mistral/modeling_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/mistral/modeling_mistral.py -------------------------------------------------------------------------------- /src/modeling_ultragist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/modeling_ultragist.py -------------------------------------------------------------------------------- /src/modeling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/modeling_utils.py -------------------------------------------------------------------------------- /src/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/trainer.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/namespace-Pt/UltraGist/HEAD/src/utils.py --------------------------------------------------------------------------------