├── .gitignore ├── LICENSE ├── README.md ├── evaluate_raven ├── checkpoint_to_hf_converter.py ├── local_lm_eval.py ├── misc_benchmark_variants │ ├── README.md │ ├── eval_alternate_loops.py │ ├── gms8k_long_cot.yaml │ ├── gsm8K_issue_16.yaml │ ├── gsm8k_passk.yaml │ ├── hf_eval_adaptive_compute.py │ ├── open_openbookqa.yaml │ └── pass_at_k_utils.py ├── quick_chat_rec_compare.py ├── quick_checkpoint_eval.py ├── record_steps_in_bench.py └── saturation_eval_dist.py ├── examples ├── inference_demo.ipynb ├── spec_decoding_example.ipynb └── trajectory_analysis.ipynb ├── finetuning_simple_example.py ├── launch_configs ├── frontier_baseline_256_nodes.yaml ├── frontier_nebel_512_nodes.yaml └── recurrent.yaml ├── launch_frontier.py ├── pyproject.toml ├── recpre ├── __init__.py ├── attention_backends │ ├── __init__.py │ ├── amd.py │ ├── binBlk.py │ ├── cuda_flash_attention.py │ ├── flex_attentions.py │ ├── interface.py │ ├── mosaic.py │ ├── openai.py │ ├── pytorch.py │ ├── testing.py │ └── triton_kernels_seq_par.py ├── checkpoint_patch.py ├── config_dynamic.py ├── data_loading_utils.py ├── data_scheduler_utils.py ├── huggingface_dataset.py ├── init.py ├── legacy_modeling_file.py ├── misc.py ├── model_dynamic.py ├── model_registry.py ├── monitor.py ├── norms.py ├── ops.py ├── optim.py ├── raven_config_minimal.json ├── raven_config_minimal.py ├── raven_modeling_minimal.py ├── settings.py ├── soap.py ├── tokenizer.py └── utils.py ├── scripts ├── classify_loops.py ├── compare_tokenizers.py ├── data_processing_pipeline.py ├── download_stream_to_local_copy.py ├── instructionsets.yaml ├── parquet_to_parquet_dedup.py ├── parquet_to_parquet_sentence_dedup.py ├── parquet_to_parquet_shuffler.py ├── parquet_to_parquet_tokenizer.py ├── parquet_token_counter.py ├── process_token_counts.py ├── scalable_data_download.py ├── softwareheritage.yaml ├── sources.yaml ├── target_domain_sets.yaml ├── the_stack_downloader.py └── tokenizer_generation.py ├── train.py └── vllm ├── README.md ├── chat_repl.py ├── generation_config.json ├── raven_vllm.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/README.md -------------------------------------------------------------------------------- /evaluate_raven/checkpoint_to_hf_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/checkpoint_to_hf_converter.py -------------------------------------------------------------------------------- /evaluate_raven/local_lm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/local_lm_eval.py -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/README.md -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/eval_alternate_loops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/eval_alternate_loops.py -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/gms8k_long_cot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/gms8k_long_cot.yaml -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/gsm8K_issue_16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/gsm8K_issue_16.yaml -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/gsm8k_passk.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/gsm8k_passk.yaml -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/hf_eval_adaptive_compute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/hf_eval_adaptive_compute.py -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/open_openbookqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/open_openbookqa.yaml -------------------------------------------------------------------------------- /evaluate_raven/misc_benchmark_variants/pass_at_k_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/misc_benchmark_variants/pass_at_k_utils.py -------------------------------------------------------------------------------- /evaluate_raven/quick_chat_rec_compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/quick_chat_rec_compare.py -------------------------------------------------------------------------------- /evaluate_raven/quick_checkpoint_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/quick_checkpoint_eval.py -------------------------------------------------------------------------------- /evaluate_raven/record_steps_in_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/record_steps_in_bench.py -------------------------------------------------------------------------------- /evaluate_raven/saturation_eval_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/evaluate_raven/saturation_eval_dist.py -------------------------------------------------------------------------------- /examples/inference_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/examples/inference_demo.ipynb -------------------------------------------------------------------------------- /examples/spec_decoding_example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/examples/spec_decoding_example.ipynb -------------------------------------------------------------------------------- /examples/trajectory_analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/examples/trajectory_analysis.ipynb -------------------------------------------------------------------------------- /finetuning_simple_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/finetuning_simple_example.py -------------------------------------------------------------------------------- /launch_configs/frontier_baseline_256_nodes.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/launch_configs/frontier_baseline_256_nodes.yaml -------------------------------------------------------------------------------- /launch_configs/frontier_nebel_512_nodes.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/launch_configs/frontier_nebel_512_nodes.yaml -------------------------------------------------------------------------------- /launch_configs/recurrent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/launch_configs/recurrent.yaml -------------------------------------------------------------------------------- /launch_frontier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/launch_frontier.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/pyproject.toml -------------------------------------------------------------------------------- /recpre/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/__init__.py -------------------------------------------------------------------------------- /recpre/attention_backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/__init__.py -------------------------------------------------------------------------------- /recpre/attention_backends/amd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/amd.py -------------------------------------------------------------------------------- /recpre/attention_backends/binBlk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/binBlk.py -------------------------------------------------------------------------------- /recpre/attention_backends/cuda_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/cuda_flash_attention.py -------------------------------------------------------------------------------- /recpre/attention_backends/flex_attentions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/flex_attentions.py -------------------------------------------------------------------------------- /recpre/attention_backends/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/interface.py -------------------------------------------------------------------------------- /recpre/attention_backends/mosaic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/mosaic.py -------------------------------------------------------------------------------- /recpre/attention_backends/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/openai.py -------------------------------------------------------------------------------- /recpre/attention_backends/pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/pytorch.py -------------------------------------------------------------------------------- /recpre/attention_backends/testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/testing.py -------------------------------------------------------------------------------- /recpre/attention_backends/triton_kernels_seq_par.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/attention_backends/triton_kernels_seq_par.py -------------------------------------------------------------------------------- /recpre/checkpoint_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/checkpoint_patch.py -------------------------------------------------------------------------------- /recpre/config_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/config_dynamic.py -------------------------------------------------------------------------------- /recpre/data_loading_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/data_loading_utils.py -------------------------------------------------------------------------------- /recpre/data_scheduler_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/data_scheduler_utils.py -------------------------------------------------------------------------------- /recpre/huggingface_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/huggingface_dataset.py -------------------------------------------------------------------------------- /recpre/init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/init.py -------------------------------------------------------------------------------- /recpre/legacy_modeling_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/legacy_modeling_file.py -------------------------------------------------------------------------------- /recpre/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/misc.py -------------------------------------------------------------------------------- /recpre/model_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/model_dynamic.py -------------------------------------------------------------------------------- /recpre/model_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/model_registry.py -------------------------------------------------------------------------------- /recpre/monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/monitor.py -------------------------------------------------------------------------------- /recpre/norms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/norms.py -------------------------------------------------------------------------------- /recpre/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/ops.py -------------------------------------------------------------------------------- /recpre/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/optim.py -------------------------------------------------------------------------------- /recpre/raven_config_minimal.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/raven_config_minimal.json -------------------------------------------------------------------------------- /recpre/raven_config_minimal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/raven_config_minimal.py -------------------------------------------------------------------------------- /recpre/raven_modeling_minimal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/raven_modeling_minimal.py -------------------------------------------------------------------------------- /recpre/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/settings.py -------------------------------------------------------------------------------- /recpre/soap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/soap.py -------------------------------------------------------------------------------- /recpre/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/tokenizer.py -------------------------------------------------------------------------------- /recpre/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/recpre/utils.py -------------------------------------------------------------------------------- /scripts/classify_loops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/classify_loops.py -------------------------------------------------------------------------------- /scripts/compare_tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/compare_tokenizers.py -------------------------------------------------------------------------------- /scripts/data_processing_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/data_processing_pipeline.py -------------------------------------------------------------------------------- /scripts/download_stream_to_local_copy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/download_stream_to_local_copy.py -------------------------------------------------------------------------------- /scripts/instructionsets.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/instructionsets.yaml -------------------------------------------------------------------------------- /scripts/parquet_to_parquet_dedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/parquet_to_parquet_dedup.py -------------------------------------------------------------------------------- /scripts/parquet_to_parquet_sentence_dedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/parquet_to_parquet_sentence_dedup.py -------------------------------------------------------------------------------- /scripts/parquet_to_parquet_shuffler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/parquet_to_parquet_shuffler.py -------------------------------------------------------------------------------- /scripts/parquet_to_parquet_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/parquet_to_parquet_tokenizer.py -------------------------------------------------------------------------------- /scripts/parquet_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/parquet_token_counter.py -------------------------------------------------------------------------------- /scripts/process_token_counts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/process_token_counts.py -------------------------------------------------------------------------------- /scripts/scalable_data_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/scalable_data_download.py -------------------------------------------------------------------------------- /scripts/softwareheritage.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/softwareheritage.yaml -------------------------------------------------------------------------------- /scripts/sources.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/sources.yaml -------------------------------------------------------------------------------- /scripts/target_domain_sets.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/target_domain_sets.yaml -------------------------------------------------------------------------------- /scripts/the_stack_downloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/the_stack_downloader.py -------------------------------------------------------------------------------- /scripts/tokenizer_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/scripts/tokenizer_generation.py -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/train.py -------------------------------------------------------------------------------- /vllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/vllm/README.md -------------------------------------------------------------------------------- /vllm/chat_repl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/vllm/chat_repl.py -------------------------------------------------------------------------------- /vllm/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/vllm/generation_config.json -------------------------------------------------------------------------------- /vllm/raven_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/vllm/raven_vllm.py -------------------------------------------------------------------------------- /vllm/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal-rg/recurrent-pretraining/HEAD/vllm/setup.py --------------------------------------------------------------------------------