├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── .keep ├── first-release.pdf └── octothinker_banner.png ├── eval ├── README.md ├── configs │ └── en_math_cot_few_shot_test_configs.json ├── data_processing │ ├── answer_extraction.py │ └── process_utils.py ├── datasets │ ├── amc23 │ │ └── test.jsonl │ ├── asdiv │ │ └── test.jsonl │ ├── gpqa │ │ ├── gpqa_diamond.json │ │ ├── gpqa_download.py │ │ └── gpqa_main.json │ ├── gsm8k │ │ └── test.jsonl │ ├── math │ │ └── test.jsonl │ ├── math500 │ │ └── test.jsonl │ ├── mathqa │ │ └── test.jsonl │ ├── mawps │ │ └── test.jsonl │ ├── mmlu_stem │ │ ├── analysis.py │ │ └── test.jsonl │ ├── ocw │ │ └── test.jsonl │ ├── olympiad_bench │ │ └── test.jsonl │ ├── sat │ │ └── test.jsonl │ ├── svamp │ │ └── test.jsonl │ └── tabmwp │ │ └── test.jsonl ├── eval │ ├── eval_script.py │ ├── eval_utils.py │ ├── ocwcourses_eval_utils.py │ ├── python_executor.py │ └── utils.py ├── few_shot_prompts │ ├── __init__.py │ ├── cot_amc_0_shot.py │ ├── cot_gpqa_5_shot.py │ ├── cot_gsm_8_shot.py │ ├── cot_math_sat_4_shot.py │ ├── cot_mathqa_4_shot.py │ ├── cot_minerva_math_4_shot.py │ ├── cot_mmlu_stem_4_shot.py │ ├── cot_ocwcourses_4_shot.py │ └── few_shot_prompting.py ├── infer │ └── run_cot_eval.py ├── requirements.txt ├── run_subset_parallel.py ├── scripts │ └── en_math_cot_eval_last4dir.sh ├── summarize_results.py └── utils.py ├── midtraining └── nanotron │ ├── .github │ └── workflows │ │ ├── 3d_parallelism_unit_tests.yaml │ │ ├── code_quality.yaml │ │ ├── fa2_unit_tests.yaml │ │ ├── python-release.yml │ │ └── trufflehog.yml │ ├── .gitignore │ ├── .pre-commit-config-check.yaml │ ├── .pre-commit-config.yaml │ ├── .pylintrc │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── docs │ ├── 3d_parallelism.md │ ├── debugging.md │ ├── docs.md │ ├── image-2.png │ ├── image.png │ └── nanoset.md │ ├── examples │ ├── bench_llama_7b.py │ ├── config_nanoset.yaml │ ├── config_tiny_llama.py │ ├── config_tiny_llama.yaml │ ├── config_tiny_llama_with_s3_upload.yaml │ ├── contributor-guide │ │ ├── README.md │ │ ├── assets │ │ │ ├── 1.png │ │ │ ├── 10.png │ │ │ ├── 11.png │ │ │ ├── 2.png │ │ │ ├── 3.png │ │ │ ├── 4.png │ │ │ ├── 5.png │ │ │ ├── 6.png │ │ │ ├── 7.png │ │ │ ├── 8.png │ │ │ └── 9.png │ │ ├── debug_config_tiny_llama.py │ │ ├── debug_config_tiny_llama.yaml │ │ └── debug_tiny_llama.sh │ ├── custom-dataloader │ │ ├── README.md │ │ ├── config_custom_dl.yaml │ │ └── run_train.py │ ├── doremi │ │ ├── README.md │ │ ├── __init__.py │ │ ├── assets │ │ │ ├── domain_weights.png │ │ │ ├── not_outperform.png │ │ │ └── outperform.png │ │ ├── configs │ │ │ ├── config_2.8b_llama.yaml │ │ │ ├── config_2.8b_llama_with_tuned_weights.yaml │ │ │ ├── config_280m_llama.yaml │ │ │ └── config_280m_llama_proxy.yaml │ │ ├── doremi │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── dataloader.py │ │ │ ├── doremi_context.py │ │ │ ├── llama.py │ │ │ ├── loss.py │ │ │ ├── trainer.py │ │ │ └── utils.py │ │ ├── requirements.txt │ │ ├── tests │ │ │ ├── test_doremi_context.py │ │ │ ├── test_doremi_dataloader.py │ │ │ ├── test_doremi_loss.py │ │ │ ├── test_doremi_sampler.py │ │ │ ├── test_doremi_utils.py │ │ │ └── utils.py │ │ ├── train_doremi.py │ │ ├── train_reference.py │ │ └── utils.py │ ├── llama │ │ ├── README.md │ │ ├── __init__.py │ │ ├── convert_hf_to_nanotron.py │ │ ├── convert_nanotron_to_hf.py │ │ ├── convert_weights.py │ │ ├── requirements.txt │ │ └── tests │ │ │ ├── test_conversion.py │ │ │ ├── test_conversion.py.orig │ │ │ └── utils.py │ ├── mamba │ │ ├── README.md │ │ ├── assets │ │ │ └── loss_mamba.png │ │ ├── config.py │ │ ├── config_mamba.yaml │ │ ├── convert_hf_to_nanotron.py │ │ ├── convert_nanotron_to_hf.py │ │ ├── create_config_mamba.py │ │ ├── mamba.py │ │ ├── requirements.txt │ │ ├── run_generate.py │ │ ├── selective_scan_interface.py │ │ ├── train_mamba.py │ │ ├── train_mamba.sh │ │ └── trainer.py │ ├── moe │ │ ├── README.md │ │ ├── config_llamoe.py │ │ ├── config_llamoe.yaml │ │ ├── llamoe.py │ │ ├── moe.py │ │ ├── requirements.txt │ │ └── train_moe.py │ ├── mup │ │ ├── README.md │ │ ├── assets │ │ │ ├── llama.png │ │ │ ├── scale-across-depth.png │ │ │ └── scale-across-width.png │ │ └── configs │ │ │ ├── mup_350m_llama_config.yaml │ │ │ └── sp_350m_llama_config.yaml │ └── train_tiny_llama.sh │ ├── pyproject.toml │ ├── run_generate.py │ ├── run_train.py │ ├── scripts │ ├── fix_checkpoint_bad_naming.py │ └── log_lighteval_to_wandb.py │ ├── src │ └── nanotron │ │ ├── __init__.py │ │ ├── config │ │ ├── __init__.py │ │ ├── config.py │ │ ├── lighteval_config.py │ │ ├── models_config.py │ │ ├── parallelism_config.py │ │ └── utils_config.py │ │ ├── constants.py │ │ ├── data │ │ ├── collator.py │ │ ├── dataloader_builder.py │ │ ├── nanoset.py │ │ └── utils.py │ │ ├── dataloader.py │ │ ├── distributed.py │ │ ├── fp8 │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── dtypes.py │ │ ├── kernel.py │ │ ├── linear.py │ │ ├── meta.py │ │ ├── parameter.py │ │ ├── tensor.py │ │ └── utils.py │ │ ├── generation │ │ ├── __init__.py │ │ ├── decode.py │ │ ├── generate_store.py │ │ └── sampler.py │ │ ├── helpers.py │ │ ├── logging.py │ │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── llama.py │ │ └── starcoder2.py │ │ ├── nn │ │ ├── __init__.py │ │ ├── activations.py │ │ └── layer_norm.py │ │ ├── optim │ │ ├── __init__.py │ │ ├── base.py │ │ ├── clip_grads.py │ │ ├── gradient_accumulator.py │ │ ├── inherit_from_other_optimizer.py │ │ ├── named_optimizer.py │ │ ├── optimizer_from_gradient_accumulator.py │ │ └── zero.py │ │ ├── parallel │ │ ├── __init__.py │ │ ├── context.py │ │ ├── data_parallel │ │ │ └── utils.py │ │ ├── parameters.py │ │ ├── pipeline_parallel │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── block.py │ │ │ ├── context_manager.py │ │ │ ├── engine.py │ │ │ ├── functional.py │ │ │ ├── p2p.py │ │ │ ├── state.py │ │ │ ├── tensor_pointer.py │ │ │ └── utils.py │ │ ├── sharded_parameters.py │ │ ├── tensor_parallel │ │ │ ├── __init__.py │ │ │ ├── distributed_differentiable_primitives.py │ │ │ ├── enum.py │ │ │ ├── functional.py │ │ │ └── nn.py │ │ ├── tied_parameters.py │ │ └── utils.py │ │ ├── random.py │ │ ├── s3_checkpoints │ │ ├── __init__.py │ │ ├── fsspec.py │ │ └── s3_mover.py │ │ ├── sanity_checks.py │ │ ├── scaling │ │ └── parametrization.py │ │ ├── serialize │ │ ├── __init__.py │ │ ├── main.py │ │ ├── metadata.py │ │ ├── optimizer.py │ │ ├── random.py │ │ ├── utils.py │ │ └── weights.py │ │ ├── trainer.py │ │ └── utils.py │ ├── tests │ ├── fp8 │ │ ├── test_fp8_parameter.py │ │ ├── test_linear.py │ │ └── test_tensor.py │ ├── helpers │ │ ├── context.py │ │ ├── data.py │ │ ├── distributed_tensor.py │ │ ├── dummy.py │ │ ├── exception.py │ │ ├── llama.py │ │ └── utils.py │ ├── kernels │ │ ├── run_layer_norm_convergence.py │ │ └── test_layer_norm.py │ ├── nanoset │ │ └── test_build_nanoset_dataloader.py │ ├── pytest.ini │ ├── test_base_model.py │ ├── test_checkpointing.py │ ├── test_clip_grads.py │ ├── test_data_parallel.py │ ├── test_distributed.py │ ├── test_optimizer.py │ ├── test_optimizer_params_groups.py │ ├── test_p2p.py │ ├── test_parameter.py │ ├── test_parameters_accumulate_gradient_in_fp32.py │ ├── test_parametrization.py │ ├── test_pipeline_parallel.py │ ├── test_random_state.py │ ├── test_serialize.py │ ├── test_tensor_parallel.py │ ├── test_tie_weights.py │ └── test_zero.py │ └── tools │ └── preprocess_data.py ├── plot ├── data │ ├── .DS_Store │ ├── 3b_decay_ablation_omi2_openr1_mix_ratio │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── 3b_decay_ablation_omi2_ratio │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── 3b_decay_ablation_openr1_ratio │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── 3b_decay_ablation_shortqa_ratio │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_data_quality │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_data_quantity │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_instruction │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_length_scheduler │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_longcot │ │ ├── wandb_export_2025-04-17T23_17_00.528+08_00.csv │ │ ├── wandb_export_2025-04-17T23_17_07.600+08_00.csv │ │ ├── wandb_export_2025-04-17T23_17_11.381+08_00.csv │ │ ├── wandb_export_2025-04-17T23_17_15.974+08_00.csv │ │ ├── wandb_export_2025-04-17T23_17_20.734+08_00.csv │ │ ├── wandb_export_2025-04-17T23_17_24.700+08_00.csv │ │ ├── wandb_export_2025-04-17T23_17_28.859+08_00.csv │ │ └── wandb_export_2025-04-17T23_17_37.132+08_00.csv │ ├── analysis_longcot_instruction │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_longcot_template │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_qa │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── analysis_shortqa │ │ ├── wandb_export_2025-04-17T23_05_15.499+08_00.csv │ │ ├── wandb_export_2025-04-17T23_05_19.730+08_00.csv │ │ ├── wandb_export_2025-04-17T23_05_23.333+08_00.csv │ │ ├── wandb_export_2025-04-17T23_05_41.477+08_00.csv │ │ ├── wandb_export_2025-04-17T23_05_45.214+08_00.csv │ │ ├── wandb_export_2025-04-17T23_05_49.016+08_00.csv │ │ └── wandb_export_2025-04-17T23_05_53.806+08_00.csv │ ├── backup │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── cpt_results │ │ └── stable.json │ ├── motivation │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv │ ├── octo-vs-llama-vs-qwen │ │ └── wandb_export_2025-04-23T10_43_34.329+08_00.csv │ └── octothinker │ │ ├── 1b │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ ├── length_olympiadbench.csv │ │ └── read.py │ │ ├── 1b_backup │ │ ├── acc_amc23.csv │ │ ├── acc_amc23.csv_merged.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_gsm8k.csv_merged.csv │ │ ├── acc_math500.csv │ │ ├── acc_math500.csv_merged.csv │ │ ├── acc_olympiadbench.csv │ │ ├── acc_olympiadbench.csv_merged.csv │ │ ├── length_amc23.csv │ │ ├── length_amc23.csv_merged.csv │ │ ├── length_gsm8k.csv │ │ ├── length_gsm8k.csv_merged.csv │ │ ├── length_math500.csv │ │ ├── length_math500.csv_merged.csv │ │ ├── length_olympiadbench.csv │ │ ├── length_olympiadbench.csv_merged.csv │ │ └── read.py │ │ └── 3b │ │ ├── acc_amc23.csv │ │ ├── acc_gsm8k.csv │ │ ├── acc_math500.csv │ │ ├── acc_olympiadbench.csv │ │ ├── length_amc23.csv │ │ ├── length_gsm8k.csv │ │ ├── length_math500.csv │ │ └── length_olympiadbench.csv ├── figures │ ├── 3b_decay_ablation_longqa_ratio.pdf │ ├── 3b_decay_ablation_longqa_ratio.png │ ├── 3b_decay_ablation_omi2_openr1_mix_ratio.pdf │ ├── 3b_decay_ablation_omi2_openr1_mix_ratio.png │ ├── 3b_decay_ablation_omi2_ratio.pdf │ ├── 3b_decay_ablation_omi2_ratio.png │ ├── 3b_decay_ablation_shortqa_ratio.pdf │ ├── 3b_decay_ablation_shortqa_ratio.png │ ├── analysis_data_quality.pdf │ ├── analysis_data_quality.png │ ├── analysis_data_quantity.pdf │ ├── analysis_data_quantity.png │ ├── analysis_instruction.pdf │ ├── analysis_instruction.png │ ├── analysis_length_scheduler.pdf │ ├── analysis_length_scheduler.png │ ├── analysis_longcot_instruction.pdf │ ├── analysis_longcot_instruction.png │ ├── analysis_qa.pdf │ ├── analysis_qa.png │ ├── analysis_template.pdf │ ├── analysis_template.png │ ├── movitivation.pdf │ ├── movitivation.png │ ├── octo-vs-llama-vs-qwen.png │ ├── octothinker_1b.png │ ├── octothinker_3b.png │ └── pass@k.png └── plot │ ├── 3b_decay_ablation_longqa_ratio.ipynb │ ├── 3b_decay_ablation_omi2_openr1_mix_ratio.ipynb │ ├── 3b_decay_ablation_omi2_ratio.ipynb │ ├── 3b_decay_ablation_shortqa_ratio.ipynb │ ├── analysis_data_quality.ipynb │ ├── analysis_data_quantity.ipynb │ ├── analysis_instruction.ipynb │ ├── analysis_length_scheduler.ipynb │ ├── analysis_longcot_instruction.ipynb │ ├── analysis_qa.ipynb │ ├── analysis_template.ipynb │ ├── motivation.ipynb │ ├── octo-vs-llama-vs-qwen.ipynb │ ├── octothinker-1b.ipynb │ ├── octothinker-3b.ipynb │ └── pass_k.ipynb └── rl ├── data └── math.8k │ ├── test.parquet │ └── train.parquet ├── requirements.txt ├── scripts ├── llama_3b_hybrid.sh └── llama_3b_long.sh └── verl ├── __init__.py ├── models ├── README.md ├── __init__.py ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── qwen2 │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── qwen2_loader.py │ │ └── qwen2_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_qwen2_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── llama.py │ ├── monkey_patch.py │ ├── qwen2.py │ └── qwen2_vl.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ └── ray.py │ ├── worker.py │ └── worker_group.py └── ray │ ├── __init__.py │ ├── base.py │ └── megatron.py ├── third_party ├── __init__.py └── vllm │ ├── __init__.py │ ├── vllm_spmd │ ├── __init__.py │ └── dtensor_weight_loaders.py │ ├── vllm_v_0_3_1 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── tokenizer.py │ ├── weight_loaders.py │ └── worker.py │ ├── vllm_v_0_4_2 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── trainer ├── __init__.py ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── core_algos.py │ └── ray_trainer.py └── runtime_env.yaml ├── utils ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── checkpoint_manager.py │ └── fsdp_checkpoint_manager.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ └── sft_dataset.py ├── debug │ ├── __init__.py │ ├── performance.py │ └── trajectory_tracker.py ├── distributed.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── hdfs_io.py ├── import_utils.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── model.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── eval.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── math.py │ ├── math_verifier.py │ ├── prime_code │ │ ├── __init__.py │ │ ├── testing_util.py │ │ └── utils.py │ └── prime_math │ │ ├── __init__.py │ │ ├── grader.py │ │ └── math_normalize.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py ├── ulysses.py └── vllm_scheduler │ └── vllm_params_manager.py ├── version └── version └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_manager ├── __init__.py ├── naive.py └── prime.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ └── naive_rollout.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ ├── fire_vllm_rollout.py │ ├── vllm_rollout.py │ └── vllm_rollout_spmd.py └── sharding_manager ├── __init__.py ├── base.py ├── fsdp_ulysses.py ├── fsdp_vllm.py └── megatron_vllm.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/README.md -------------------------------------------------------------------------------- /assets/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/first-release.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/assets/first-release.pdf -------------------------------------------------------------------------------- /assets/octothinker_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/assets/octothinker_banner.png -------------------------------------------------------------------------------- /eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/README.md -------------------------------------------------------------------------------- /eval/configs/en_math_cot_few_shot_test_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/configs/en_math_cot_few_shot_test_configs.json -------------------------------------------------------------------------------- /eval/data_processing/answer_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/data_processing/answer_extraction.py -------------------------------------------------------------------------------- /eval/data_processing/process_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/data_processing/process_utils.py -------------------------------------------------------------------------------- /eval/datasets/amc23/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/amc23/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/asdiv/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/asdiv/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/gpqa/gpqa_diamond.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/gpqa/gpqa_diamond.json -------------------------------------------------------------------------------- /eval/datasets/gpqa/gpqa_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/gpqa/gpqa_download.py -------------------------------------------------------------------------------- /eval/datasets/gpqa/gpqa_main.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/gpqa/gpqa_main.json -------------------------------------------------------------------------------- /eval/datasets/gsm8k/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/gsm8k/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/math/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/math500/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/math500/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/mathqa/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/mathqa/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/mawps/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/mawps/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/mmlu_stem/analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/mmlu_stem/analysis.py -------------------------------------------------------------------------------- /eval/datasets/mmlu_stem/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/mmlu_stem/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/ocw/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/ocw/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/olympiad_bench/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/olympiad_bench/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/sat/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/sat/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/svamp/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/svamp/test.jsonl -------------------------------------------------------------------------------- /eval/datasets/tabmwp/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/datasets/tabmwp/test.jsonl -------------------------------------------------------------------------------- /eval/eval/eval_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/eval/eval_script.py -------------------------------------------------------------------------------- /eval/eval/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/eval/eval_utils.py -------------------------------------------------------------------------------- /eval/eval/ocwcourses_eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/eval/ocwcourses_eval_utils.py -------------------------------------------------------------------------------- /eval/eval/python_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/eval/python_executor.py -------------------------------------------------------------------------------- /eval/eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/eval/utils.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/__init__.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_amc_0_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_amc_0_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_gpqa_5_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_gpqa_5_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_gsm_8_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_gsm_8_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_math_sat_4_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_math_sat_4_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_mathqa_4_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_mathqa_4_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_minerva_math_4_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_minerva_math_4_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_mmlu_stem_4_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_mmlu_stem_4_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/cot_ocwcourses_4_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/cot_ocwcourses_4_shot.py -------------------------------------------------------------------------------- /eval/few_shot_prompts/few_shot_prompting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/few_shot_prompts/few_shot_prompting.py -------------------------------------------------------------------------------- /eval/infer/run_cot_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/infer/run_cot_eval.py -------------------------------------------------------------------------------- /eval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/requirements.txt -------------------------------------------------------------------------------- /eval/run_subset_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/run_subset_parallel.py -------------------------------------------------------------------------------- /eval/scripts/en_math_cot_eval_last4dir.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/scripts/en_math_cot_eval_last4dir.sh -------------------------------------------------------------------------------- /eval/summarize_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/summarize_results.py -------------------------------------------------------------------------------- /eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/eval/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/.github/workflows/3d_parallelism_unit_tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.github/workflows/3d_parallelism_unit_tests.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/.github/workflows/code_quality.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.github/workflows/code_quality.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/.github/workflows/fa2_unit_tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.github/workflows/fa2_unit_tests.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/.github/workflows/python-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.github/workflows/python-release.yml -------------------------------------------------------------------------------- /midtraining/nanotron/.github/workflows/trufflehog.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.github/workflows/trufflehog.yml -------------------------------------------------------------------------------- /midtraining/nanotron/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.gitignore -------------------------------------------------------------------------------- /midtraining/nanotron/.pre-commit-config-check.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.pre-commit-config-check.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.pre-commit-config.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/.pylintrc -------------------------------------------------------------------------------- /midtraining/nanotron/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /midtraining/nanotron/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/CONTRIBUTING.md -------------------------------------------------------------------------------- /midtraining/nanotron/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/LICENSE -------------------------------------------------------------------------------- /midtraining/nanotron/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/Makefile -------------------------------------------------------------------------------- /midtraining/nanotron/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/docs/3d_parallelism.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/docs/3d_parallelism.md -------------------------------------------------------------------------------- /midtraining/nanotron/docs/debugging.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/docs/debugging.md -------------------------------------------------------------------------------- /midtraining/nanotron/docs/docs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/docs/docs.md -------------------------------------------------------------------------------- /midtraining/nanotron/docs/image-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/docs/image-2.png -------------------------------------------------------------------------------- /midtraining/nanotron/docs/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/docs/image.png -------------------------------------------------------------------------------- /midtraining/nanotron/docs/nanoset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/docs/nanoset.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/bench_llama_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/bench_llama_7b.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/config_nanoset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/config_nanoset.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/config_tiny_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/config_tiny_llama.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/config_tiny_llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/config_tiny_llama.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/config_tiny_llama_with_s3_upload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/config_tiny_llama_with_s3_upload.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/1.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/10.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/11.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/2.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/3.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/4.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/5.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/6.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/7.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/8.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/assets/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/assets/9.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/debug_config_tiny_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/debug_config_tiny_llama.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/debug_config_tiny_llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/debug_config_tiny_llama.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/contributor-guide/debug_tiny_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/contributor-guide/debug_tiny_llama.sh -------------------------------------------------------------------------------- /midtraining/nanotron/examples/custom-dataloader/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/custom-dataloader/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/custom-dataloader/config_custom_dl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/custom-dataloader/config_custom_dl.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/custom-dataloader/run_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/custom-dataloader/run_train.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/assets/domain_weights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/assets/domain_weights.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/assets/not_outperform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/assets/not_outperform.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/assets/outperform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/assets/outperform.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/configs/config_2.8b_llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/configs/config_2.8b_llama.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/configs/config_2.8b_llama_with_tuned_weights.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/configs/config_2.8b_llama_with_tuned_weights.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/configs/config_280m_llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/configs/config_280m_llama.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/configs/config_280m_llama_proxy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/configs/config_280m_llama_proxy.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/config.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/dataloader.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/doremi_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/doremi_context.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/llama.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/loss.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/trainer.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/doremi/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/doremi/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/tests/test_doremi_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/tests/test_doremi_context.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/tests/test_doremi_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/tests/test_doremi_dataloader.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/tests/test_doremi_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/tests/test_doremi_loss.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/tests/test_doremi_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/tests/test_doremi_sampler.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/tests/test_doremi_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/tests/test_doremi_utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/tests/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/train_doremi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/train_doremi.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/train_reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/train_reference.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/doremi/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/doremi/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/convert_hf_to_nanotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/convert_hf_to_nanotron.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/convert_nanotron_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/convert_nanotron_to_hf.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/convert_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/convert_weights.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.39.3 2 | -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/tests/test_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/tests/test_conversion.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/tests/test_conversion.py.orig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/tests/test_conversion.py.orig -------------------------------------------------------------------------------- /midtraining/nanotron/examples/llama/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/llama/tests/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/assets/loss_mamba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/assets/loss_mamba.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/config.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/config_mamba.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/config_mamba.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/convert_hf_to_nanotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/convert_hf_to_nanotron.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/convert_nanotron_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/convert_nanotron_to_hf.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/create_config_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/create_config_mamba.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/mamba.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/requirements.txt -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/run_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/run_generate.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/selective_scan_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/selective_scan_interface.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/train_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/train_mamba.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/train_mamba.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/train_mamba.sh -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mamba/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mamba/trainer.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/moe/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/config_llamoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/moe/config_llamoe.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/config_llamoe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/moe/config_llamoe.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/llamoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/moe/llamoe.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/moe/moe.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/requirements.txt: -------------------------------------------------------------------------------- 1 | stanford-stk>=0.0.6 2 | megablocks==0.5.1 3 | -------------------------------------------------------------------------------- /midtraining/nanotron/examples/moe/train_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/moe/train_moe.py -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mup/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mup/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mup/assets/llama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mup/assets/llama.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mup/assets/scale-across-depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mup/assets/scale-across-depth.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mup/assets/scale-across-width.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mup/assets/scale-across-width.png -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mup/configs/mup_350m_llama_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mup/configs/mup_350m_llama_config.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/mup/configs/sp_350m_llama_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/mup/configs/sp_350m_llama_config.yaml -------------------------------------------------------------------------------- /midtraining/nanotron/examples/train_tiny_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/examples/train_tiny_llama.sh -------------------------------------------------------------------------------- /midtraining/nanotron/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/pyproject.toml -------------------------------------------------------------------------------- /midtraining/nanotron/run_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/run_generate.py -------------------------------------------------------------------------------- /midtraining/nanotron/run_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/run_train.py -------------------------------------------------------------------------------- /midtraining/nanotron/scripts/fix_checkpoint_bad_naming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/scripts/fix_checkpoint_bad_naming.py -------------------------------------------------------------------------------- /midtraining/nanotron/scripts/log_lighteval_to_wandb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/scripts/log_lighteval_to_wandb.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.4" 2 | -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/config/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/config/config.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/config/lighteval_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/config/lighteval_config.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/config/models_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/config/models_config.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/config/parallelism_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/config/parallelism_config.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/config/utils_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/config/utils_config.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/constants.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/data/collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/data/collator.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/data/dataloader_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/data/dataloader_builder.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/data/nanoset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/data/nanoset.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/data/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/dataloader.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/distributed.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/constants.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/dtypes.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/kernel.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/linear.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/meta.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/parameter.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/tensor.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/fp8/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/fp8/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/generation/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/generation/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/generation/decode.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/generation/generate_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/generation/generate_store.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/generation/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/generation/sampler.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/helpers.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/logging.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/models/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/models/base.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/models/llama.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/models/starcoder2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/models/starcoder2.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/nn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/nn/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/nn/activations.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/nn/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/nn/layer_norm.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/base.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/clip_grads.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/gradient_accumulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/gradient_accumulator.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/inherit_from_other_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/inherit_from_other_optimizer.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/named_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/named_optimizer.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/optimizer_from_gradient_accumulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/optimizer_from_gradient_accumulator.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/optim/zero.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/optim/zero.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/context.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/data_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/data_parallel/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/parameters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/parameters.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/README.md -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/block.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/context_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/context_manager.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/engine.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/functional.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/p2p.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/p2p.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/state.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/tensor_pointer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/tensor_pointer.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/pipeline_parallel/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/sharded_parameters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/sharded_parameters.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/tensor_parallel/enum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/tensor_parallel/enum.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/tensor_parallel/functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/tensor_parallel/functional.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/tensor_parallel/nn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/tensor_parallel/nn.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/tied_parameters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/tied_parameters.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/parallel/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/random.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/s3_checkpoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/s3_checkpoints/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/s3_checkpoints/fsspec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/s3_checkpoints/fsspec.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/s3_checkpoints/s3_mover.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/s3_checkpoints/s3_mover.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/sanity_checks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/sanity_checks.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/scaling/parametrization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/scaling/parametrization.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/__init__.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/main.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/metadata.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/optimizer.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/random.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/serialize/weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/serialize/weights.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/trainer.py -------------------------------------------------------------------------------- /midtraining/nanotron/src/nanotron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/src/nanotron/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/fp8/test_fp8_parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/fp8/test_fp8_parameter.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/fp8/test_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/fp8/test_linear.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/fp8/test_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/fp8/test_tensor.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/context.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/data.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/distributed_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/distributed_tensor.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/dummy.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/exception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/exception.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/llama.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/helpers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/helpers/utils.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/kernels/run_layer_norm_convergence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/kernels/run_layer_norm_convergence.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/kernels/test_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/kernels/test_layer_norm.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/nanoset/test_build_nanoset_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/nanoset/test_build_nanoset_dataloader.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts=-n 35 3 | markers = 4 | fa2: FA2-related 5 | -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_base_model.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_checkpointing.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_clip_grads.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_data_parallel.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_distributed.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_optimizer.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_optimizer_params_groups.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_optimizer_params_groups.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_p2p.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_p2p.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_parameter.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_parameters_accumulate_gradient_in_fp32.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_parameters_accumulate_gradient_in_fp32.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_parametrization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_parametrization.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_pipeline_parallel.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_random_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_random_state.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_serialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_serialize.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_tensor_parallel.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_tie_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_tie_weights.py -------------------------------------------------------------------------------- /midtraining/nanotron/tests/test_zero.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tests/test_zero.py -------------------------------------------------------------------------------- /midtraining/nanotron/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/midtraining/nanotron/tools/preprocess_data.py -------------------------------------------------------------------------------- /plot/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/.DS_Store -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_openr1_mix_ratio/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/length_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_omi2_ratio/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_omi2_ratio/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/length_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_openr1_ratio/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_openr1_ratio/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/length_math500.csv -------------------------------------------------------------------------------- /plot/data/3b_decay_ablation_shortqa_ratio/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/3b_decay_ablation_shortqa_ratio/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quality/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quality/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_data_quantity/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_data_quantity/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_instruction/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_instruction/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_length_scheduler/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_length_scheduler/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_00.528+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_00.528+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_07.600+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_07.600+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_11.381+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_11.381+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_15.974+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_15.974+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_20.734+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_20.734+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_24.700+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_24.700+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_28.859+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_28.859+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_37.132+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot/wandb_export_2025-04-17T23_17_37.132+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_instruction/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_instruction/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_longcot_template/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_longcot_template/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/length_math500.csv -------------------------------------------------------------------------------- /plot/data/analysis_qa/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_qa/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_15.499+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_15.499+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_19.730+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_19.730+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_23.333+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_23.333+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_41.477+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_41.477+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_45.214+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_45.214+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_49.016+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_49.016+08_00.csv -------------------------------------------------------------------------------- /plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_53.806+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/analysis_shortqa/wandb_export_2025-04-17T23_05_53.806+08_00.csv -------------------------------------------------------------------------------- /plot/data/backup/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/backup/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/backup/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/backup/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/backup/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/backup/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/backup/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/length_math500.csv -------------------------------------------------------------------------------- /plot/data/backup/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/backup/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/cpt_results/stable.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/cpt_results/stable.json -------------------------------------------------------------------------------- /plot/data/motivation/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/motivation/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/motivation/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/motivation/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/motivation/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/motivation/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/motivation/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/length_math500.csv -------------------------------------------------------------------------------- /plot/data/motivation/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/motivation/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/octo-vs-llama-vs-qwen/wandb_export_2025-04-23T10_43_34.329+08_00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octo-vs-llama-vs-qwen/wandb_export_2025-04-23T10_43_34.329+08_00.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/length_math500.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b/read.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b/read.py -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_amc23.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_amc23.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_gsm8k.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_gsm8k.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_math500.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_math500.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/acc_olympiadbench.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/acc_olympiadbench.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_amc23.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_amc23.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_gsm8k.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_gsm8k.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_math500.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_math500.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_math500.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/length_olympiadbench.csv_merged.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/length_olympiadbench.csv_merged.csv -------------------------------------------------------------------------------- /plot/data/octothinker/1b_backup/read.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/1b_backup/read.py -------------------------------------------------------------------------------- /plot/data/octothinker/3b/acc_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/acc_amc23.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/acc_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/acc_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/acc_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/acc_math500.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/acc_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/acc_olympiadbench.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/length_amc23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/length_amc23.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/length_gsm8k.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/length_gsm8k.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/length_math500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/length_math500.csv -------------------------------------------------------------------------------- /plot/data/octothinker/3b/length_olympiadbench.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/data/octothinker/3b/length_olympiadbench.csv -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_longqa_ratio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_longqa_ratio.pdf -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_longqa_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_longqa_ratio.png -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_omi2_openr1_mix_ratio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_omi2_openr1_mix_ratio.pdf -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_omi2_openr1_mix_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_omi2_openr1_mix_ratio.png -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_omi2_ratio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_omi2_ratio.pdf -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_omi2_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_omi2_ratio.png -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_shortqa_ratio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_shortqa_ratio.pdf -------------------------------------------------------------------------------- /plot/figures/3b_decay_ablation_shortqa_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/3b_decay_ablation_shortqa_ratio.png -------------------------------------------------------------------------------- /plot/figures/analysis_data_quality.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_data_quality.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_data_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_data_quality.png -------------------------------------------------------------------------------- /plot/figures/analysis_data_quantity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_data_quantity.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_data_quantity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_data_quantity.png -------------------------------------------------------------------------------- /plot/figures/analysis_instruction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_instruction.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_instruction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_instruction.png -------------------------------------------------------------------------------- /plot/figures/analysis_length_scheduler.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_length_scheduler.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_length_scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_length_scheduler.png -------------------------------------------------------------------------------- /plot/figures/analysis_longcot_instruction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_longcot_instruction.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_longcot_instruction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_longcot_instruction.png -------------------------------------------------------------------------------- /plot/figures/analysis_qa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_qa.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_qa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_qa.png -------------------------------------------------------------------------------- /plot/figures/analysis_template.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_template.pdf -------------------------------------------------------------------------------- /plot/figures/analysis_template.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/analysis_template.png -------------------------------------------------------------------------------- /plot/figures/movitivation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/movitivation.pdf -------------------------------------------------------------------------------- /plot/figures/movitivation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/movitivation.png -------------------------------------------------------------------------------- /plot/figures/octo-vs-llama-vs-qwen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/octo-vs-llama-vs-qwen.png -------------------------------------------------------------------------------- /plot/figures/octothinker_1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/octothinker_1b.png -------------------------------------------------------------------------------- /plot/figures/octothinker_3b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/octothinker_3b.png -------------------------------------------------------------------------------- /plot/figures/pass@k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/figures/pass@k.png -------------------------------------------------------------------------------- /plot/plot/3b_decay_ablation_longqa_ratio.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/3b_decay_ablation_longqa_ratio.ipynb -------------------------------------------------------------------------------- /plot/plot/3b_decay_ablation_omi2_openr1_mix_ratio.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/3b_decay_ablation_omi2_openr1_mix_ratio.ipynb -------------------------------------------------------------------------------- /plot/plot/3b_decay_ablation_omi2_ratio.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/3b_decay_ablation_omi2_ratio.ipynb -------------------------------------------------------------------------------- /plot/plot/3b_decay_ablation_shortqa_ratio.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/3b_decay_ablation_shortqa_ratio.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_data_quality.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_data_quality.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_data_quantity.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_data_quantity.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_instruction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_instruction.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_length_scheduler.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_length_scheduler.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_longcot_instruction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_longcot_instruction.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_qa.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_qa.ipynb -------------------------------------------------------------------------------- /plot/plot/analysis_template.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/analysis_template.ipynb -------------------------------------------------------------------------------- /plot/plot/motivation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/motivation.ipynb -------------------------------------------------------------------------------- /plot/plot/octo-vs-llama-vs-qwen.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/octo-vs-llama-vs-qwen.ipynb -------------------------------------------------------------------------------- /plot/plot/octothinker-1b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/octothinker-1b.ipynb -------------------------------------------------------------------------------- /plot/plot/octothinker-3b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/octothinker-3b.ipynb -------------------------------------------------------------------------------- /plot/plot/pass_k.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/plot/plot/pass_k.ipynb -------------------------------------------------------------------------------- /rl/data/math.8k/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/data/math.8k/test.parquet -------------------------------------------------------------------------------- /rl/data/math.8k/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/data/math.8k/train.parquet -------------------------------------------------------------------------------- /rl/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/requirements.txt -------------------------------------------------------------------------------- /rl/scripts/llama_3b_hybrid.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/scripts/llama_3b_hybrid.sh -------------------------------------------------------------------------------- /rl/scripts/llama_3b_long.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/scripts/llama_3b_long.sh -------------------------------------------------------------------------------- /rl/verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/README.md -------------------------------------------------------------------------------- /rl/verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/checkpoint_utils/llama_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/checkpoint_utils/llama_loader.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/checkpoint_utils/llama_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/checkpoint_utils/llama_saver.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /rl/verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /rl/verl/models/qwen2/megatron/modeling_qwen2_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/qwen2/megatron/modeling_qwen2_megatron.py -------------------------------------------------------------------------------- /rl/verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/registry.py -------------------------------------------------------------------------------- /rl/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /rl/verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /rl/verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /rl/verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /rl/verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /rl/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /rl/verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/protocol.py -------------------------------------------------------------------------------- /rl/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/megatron/__init__.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/megatron/worker.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/megatron/worker_group.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /rl/verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /rl/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /rl/verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /rl/verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/single_controller/ray/megatron.py -------------------------------------------------------------------------------- /rl/verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_spmd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_spmd/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_spmd/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_spmd/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/config.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/llm.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_3_1/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_3_1/worker.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/config.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/llm.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_4_2/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_4_2/worker.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/config.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/llm.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_5_4/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_5_4/worker.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/config.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/llm.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py -------------------------------------------------------------------------------- /rl/verl/third_party/vllm/vllm_v_0_6_3/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/third_party/vllm/vllm_v_0_6_3/worker.py -------------------------------------------------------------------------------- /rl/verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/__init__.py -------------------------------------------------------------------------------- /rl/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /rl/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /rl/verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /rl/verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /rl/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /rl/verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /rl/verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /rl/verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /rl/verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /rl/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /rl/verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /rl/verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /rl/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /rl/verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /rl/verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /rl/verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/config.py -------------------------------------------------------------------------------- /rl/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /rl/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /rl/verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /rl/verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /rl/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /rl/verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /rl/verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/distributed.py -------------------------------------------------------------------------------- /rl/verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /rl/verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/fs.py -------------------------------------------------------------------------------- /rl/verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /rl/verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /rl/verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/import_utils.py -------------------------------------------------------------------------------- /rl/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /rl/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /rl/verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /rl/verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /rl/verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/model.py -------------------------------------------------------------------------------- /rl/verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/py_functional.py -------------------------------------------------------------------------------- /rl/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /rl/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/eval.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/math_verifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/math_verifier.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/prime_code/testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/prime_code/testing_util.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /rl/verl/utils/reward_score/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/reward_score/prime_math/math_normalize.py -------------------------------------------------------------------------------- /rl/verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /rl/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /rl/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /rl/verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /rl/verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/tracking.py -------------------------------------------------------------------------------- /rl/verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/ulysses.py -------------------------------------------------------------------------------- /rl/verl/utils/vllm_scheduler/vllm_params_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/utils/vllm_scheduler/vllm_params_manager.py -------------------------------------------------------------------------------- /rl/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.2.0.dev 2 | -------------------------------------------------------------------------------- /rl/verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/actor/base.py -------------------------------------------------------------------------------- /rl/verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /rl/verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /rl/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/critic/base.py -------------------------------------------------------------------------------- /rl/verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /rl/verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /rl/verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /rl/verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/vllm_rollout/vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/vllm_rollout/vllm_rollout.py -------------------------------------------------------------------------------- /rl/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /rl/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /rl/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /rl/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /rl/verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /rl/verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/OctoThinker/HEAD/rl/verl/workers/sharding_manager/megatron_vllm.py --------------------------------------------------------------------------------