├── README.md ├── preprocess_token_PI ├── FSProcessor.py ├── __init__.py └── dataprocessor.py ├── requirements.txt ├── train_LR_llama3_target80k_use24k.sh └── utils ├── __init__.py ├── accelerate_configs ├── single_node.yaml ├── single_node_2.yaml ├── zero3_offload.json └── zero3_offload_stage2.json ├── easy_context ├── __init__.py ├── dist_flash_attn │ ├── __pycache__ │ │ ├── async_communication.cpython-310.pyc │ │ ├── lightseq_async_attn.cpython-310.pyc │ │ ├── monkey_patch.cpython-310.pyc │ │ └── prepare_input.cpython-310.pyc │ ├── async_communication.py │ ├── lightseq_async_attn.py │ ├── lightseq_async_attn_varlen.py │ ├── monkey_patch.py │ └── prepare_input.py ├── ulysses_attn │ ├── __pycache__ │ │ ├── monkey_patch.cpython-310.pyc │ │ └── prepare_inputs.cpython-310.pyc │ ├── monkey_patch.py │ └── prepare_inputs.py ├── unsloth_offloaded_gradient_checkpoint │ ├── __pycache__ │ │ └── monkey_patch.cpython-310.pyc │ └── monkey_patch.py └── zigzag_ring_attn │ ├── __pycache__ │ ├── monkey_patch.cpython-310.pyc │ └── prepare_inputs.cpython-310.pyc │ ├── monkey_patch.py │ └── prepare_inputs.py ├── loader.py ├── logger.py ├── logits_compute ├── logits_compute.py ├── readme.md └── sampler.py ├── preprocess_data.py └── train.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/README.md -------------------------------------------------------------------------------- /preprocess_token_PI/FSProcessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/preprocess_token_PI/FSProcessor.py -------------------------------------------------------------------------------- /preprocess_token_PI/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preprocess_token_PI/dataprocessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/preprocess_token_PI/dataprocessor.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/requirements.txt -------------------------------------------------------------------------------- /train_LR_llama3_target80k_use24k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/train_LR_llama3_target80k_use24k.sh -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/accelerate_configs/single_node.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/accelerate_configs/single_node.yaml -------------------------------------------------------------------------------- /utils/accelerate_configs/single_node_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/accelerate_configs/single_node_2.yaml -------------------------------------------------------------------------------- /utils/accelerate_configs/zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/accelerate_configs/zero3_offload.json -------------------------------------------------------------------------------- /utils/accelerate_configs/zero3_offload_stage2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/accelerate_configs/zero3_offload_stage2.json -------------------------------------------------------------------------------- /utils/easy_context/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/__init__.py -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/__pycache__/async_communication.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/__pycache__/async_communication.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/__pycache__/lightseq_async_attn.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/__pycache__/lightseq_async_attn.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/__pycache__/monkey_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/__pycache__/monkey_patch.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/__pycache__/prepare_input.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/__pycache__/prepare_input.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/async_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/async_communication.py -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/lightseq_async_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/lightseq_async_attn.py -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/lightseq_async_attn_varlen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/lightseq_async_attn_varlen.py -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/monkey_patch.py -------------------------------------------------------------------------------- /utils/easy_context/dist_flash_attn/prepare_input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/dist_flash_attn/prepare_input.py -------------------------------------------------------------------------------- /utils/easy_context/ulysses_attn/__pycache__/monkey_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/ulysses_attn/__pycache__/monkey_patch.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/ulysses_attn/__pycache__/prepare_inputs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/ulysses_attn/__pycache__/prepare_inputs.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/ulysses_attn/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/ulysses_attn/monkey_patch.py -------------------------------------------------------------------------------- /utils/easy_context/ulysses_attn/prepare_inputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/ulysses_attn/prepare_inputs.py -------------------------------------------------------------------------------- /utils/easy_context/unsloth_offloaded_gradient_checkpoint/__pycache__/monkey_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/unsloth_offloaded_gradient_checkpoint/__pycache__/monkey_patch.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/unsloth_offloaded_gradient_checkpoint/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/unsloth_offloaded_gradient_checkpoint/monkey_patch.py -------------------------------------------------------------------------------- /utils/easy_context/zigzag_ring_attn/__pycache__/monkey_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/zigzag_ring_attn/__pycache__/monkey_patch.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/zigzag_ring_attn/__pycache__/prepare_inputs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/zigzag_ring_attn/__pycache__/prepare_inputs.cpython-310.pyc -------------------------------------------------------------------------------- /utils/easy_context/zigzag_ring_attn/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/zigzag_ring_attn/monkey_patch.py -------------------------------------------------------------------------------- /utils/easy_context/zigzag_ring_attn/prepare_inputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/easy_context/zigzag_ring_attn/prepare_inputs.py -------------------------------------------------------------------------------- /utils/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/loader.py -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/logger.py -------------------------------------------------------------------------------- /utils/logits_compute/logits_compute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/logits_compute/logits_compute.py -------------------------------------------------------------------------------- /utils/logits_compute/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/logits_compute/readme.md -------------------------------------------------------------------------------- /utils/logits_compute/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/logits_compute/sampler.py -------------------------------------------------------------------------------- /utils/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/preprocess_data.py -------------------------------------------------------------------------------- /utils/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyuanhubj/LongRecipe/HEAD/utils/train.py --------------------------------------------------------------------------------