├── .gitattributes
├── .github
    └── workflows
    │   └── cpu_ci.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── assets
    ├── .gitkeep
    ├── cli.png
    └── flow.png
├── examples
    ├── evaluate.ipynb
    └── scoring.ipynb
├── format.sh
├── pyproject.toml
├── recipes
    ├── sky-t1-7b
    │   └── README.md
    ├── sky-t1-flash
    │   └── README.md
    └── sky-t1-preview
    │   ├── README.md
    │   ├── __init__.py
    │   ├── postprocess.py
    │   ├── preprocess.py
    │   ├── prompts.py
    │   └── recipe.py
├── scripts
    ├── __init__.py
    ├── combine_data.py
    ├── convert_format.py
    ├── convert_to_data.py
    ├── label_math_difficulty.py
    ├── prompts.py
    ├── qwen_eval_bon.py
    ├── response_rewrite.py
    └── upload_hub.py
├── skythought
    ├── __init__.py
    ├── evals
    │   ├── README.md
    │   ├── __init__.py
    │   ├── base_instruct_evals.md
    │   ├── batch
    │   │   ├── __init__.py
    │   │   ├── engines
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── initializer.py
    │   │   │   └── vllm_engine.py
    │   │   ├── env_config.py
    │   │   ├── logging
    │   │   │   └── __init__.py
    │   │   ├── pipeline.py
    │   │   ├── tokenizer.py
    │   │   ├── utils.py
    │   │   └── workload.py
    │   ├── cli.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   └── entities.py
    │   ├── inference_and_check.py
    │   ├── labeled_numina_difficulty
    │   │   └── README.md
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── model_configs.yaml
    │   │   └── system_prompts
    │   │   │   └── prime.txt
    │   ├── ray_configs
    │   │   └── ray_config.yaml
    │   ├── scoring
    │   │   ├── __init__.py
    │   │   ├── apps
    │   │   │   ├── __init__.py
    │   │   │   ├── apps_scorer.py
    │   │   │   └── apps_util.py
    │   │   ├── base.py
    │   │   ├── gsm8k
    │   │   │   ├── __init__.py
    │   │   │   └── gsm8k_scorer.py
    │   │   ├── ifeval
    │   │   │   ├── __init__.py
    │   │   │   ├── ifeval_scorer.py
    │   │   │   ├── instructions.py
    │   │   │   ├── instructions_main.py
    │   │   │   ├── instructions_registry.py
    │   │   │   └── instructions_util.py
    │   │   ├── livecodebench
    │   │   │   ├── __init__.py
    │   │   │   ├── livecodebench_scorer.py
    │   │   │   └── livecodebench_util.py
    │   │   ├── math
    │   │   │   ├── __init__.py
    │   │   │   └── math_scorer.py
    │   │   ├── taco
    │   │   │   ├── __init__.py
    │   │   │   ├── taco_scorer.py
    │   │   │   └── taco_util.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   └── pyext2.py
    │   ├── tasks
    │   │   ├── __init__.py
    │   │   ├── aime
    │   │   │   ├── aime24.yaml
    │   │   │   ├── aime24_sky.yaml
    │   │   │   ├── aime25_1.yaml
    │   │   │   ├── aime25_2.yaml
    │   │   │   └── aime_handler.py
    │   │   ├── amc23
    │   │   │   ├── amc23.yaml
    │   │   │   └── amc23_handler.py
    │   │   ├── apps
    │   │   │   ├── apps.yaml
    │   │   │   ├── apps_handler.py
    │   │   │   └── apps_util.py
    │   │   ├── arc
    │   │   │   ├── arc_c.yaml
    │   │   │   └── arc_handler.py
    │   │   ├── base.py
    │   │   ├── gpqa_diamond
    │   │   │   ├── gpqa_diamond.yaml
    │   │   │   └── gpqa_diamond_handler.py
    │   │   ├── gsm8k
    │   │   │   ├── gsm8k.yaml
    │   │   │   └── gsm8k_handler.py
    │   │   ├── liveaops
    │   │   │   ├── liveaops.yaml
    │   │   │   └── liveaops_handler.py
    │   │   ├── livecodebench
    │   │   │   ├── livecodebench.yaml
    │   │   │   ├── livecodebench_easy.yaml
    │   │   │   ├── livecodebench_handler.py
    │   │   │   ├── livecodebench_hard.yaml
    │   │   │   ├── livecodebench_medium.yaml
    │   │   │   └── livecodebench_util.py
    │   │   ├── math
    │   │   │   ├── math500.yaml
    │   │   │   └── math_handler.py
    │   │   ├── minervamath
    │   │   │   ├── minervamath.yaml
    │   │   │   └── minervamath_handler.py
    │   │   ├── mmlu
    │   │   │   ├── mmlu.yaml
    │   │   │   ├── mmlu_handler.py
    │   │   │   └── mmlu_pro.yaml
    │   │   ├── numina
    │   │   │   ├── numina.yaml
    │   │   │   ├── numina_amc_aime.yaml
    │   │   │   ├── numina_handler.py
    │   │   │   ├── numina_math.yaml
    │   │   │   └── numina_olympiads.yaml
    │   │   ├── olympiadbench
    │   │   │   ├── olympiadbench_handler.py
    │   │   │   └── olympiadbench_math_en.yaml
    │   │   ├── omni_math
    │   │   │   ├── omni_handler.py
    │   │   │   └── omni_math.yaml
    │   │   ├── taco
    │   │   │   ├── pyext2.py
    │   │   │   ├── taco.yaml
    │   │   │   ├── taco_handler.py
    │   │   │   └── taco_util.py
    │   │   └── task_util.py
    │   └── util
    │   │   ├── __init__.py
    │   │   ├── cli_util.py
    │   │   ├── common.py
    │   │   ├── math_parsing_util.py
    │   │   ├── metrics.py
    │   │   ├── response.py
    │   │   └── results.py
    ├── skythought-rl
    │   ├── .readthedocs.yaml
    │   ├── .style.yapf
    │   ├── LICENSE
    │   ├── Notice.txt
    │   ├── README.md
    │   ├── data
    │   │   ├── data_prepare_mini.py
    │   │   ├── data_prepare_step2.py
    │   │   ├── data_prepare_step4.py
    │   │   └── data_prepare_zero.py
    │   ├── docker
    │   │   ├── Dockerfile.ngc.vllm
    │   │   └── Dockerfile.vemlp.vllm.te
    │   ├── docs
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── _static
    │   │   │   └── logo.png
    │   │   ├── advance
    │   │   │   ├── dpo_extension.rst
    │   │   │   ├── fsdp_extension.rst
    │   │   │   ├── megatron_extension.rst
    │   │   │   └── placement.rst
    │   │   ├── conf.py
    │   │   ├── examples
    │   │   │   ├── config.rst
    │   │   │   ├── gsm8k_example.rst
    │   │   │   └── ppo_code_architecture.rst
    │   │   ├── experiment
    │   │   │   └── ppo.rst
    │   │   ├── index.rst
    │   │   ├── preparation
    │   │   │   ├── prepare_data.rst
    │   │   │   └── reward_function.rst
    │   │   ├── requirements-docs.txt
    │   │   ├── start
    │   │   │   ├── install.rst
    │   │   │   └── quickstart.rst
    │   │   └── workers
    │   │   │   ├── fsdp_workers.rst
    │   │   │   ├── megatron_workers.rst
    │   │   │   └── ray_trainer.rst
    │   ├── examples
    │   │   ├── data_preprocess
    │   │   │   ├── full_hh_rlhf.py
    │   │   │   ├── gsm8k.py
    │   │   │   ├── hellaswag.py
    │   │   │   ├── math_dataset.py
    │   │   │   └── taco.py
    │   │   ├── generation
    │   │   │   └── run_deepseek_v2_lite_math.sh
    │   │   ├── ppo_trainer
    │   │   │   ├── run_deepseek7b_llm.sh
    │   │   │   ├── run_deepseek_full_hh_rlhf.sh
    │   │   │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   │   │   ├── run_deepseek_megatron.sh
    │   │   │   ├── run_gemma.sh
    │   │   │   ├── run_prime-7b.sh
    │   │   │   ├── run_qwen2-7b.sh
    │   │   │   ├── run_qwen2-7b_rm.sh
    │   │   │   └── run_qwen2.5-32b.sh
    │   │   ├── ray
    │   │   │   └── tutorial.ipynb
    │   │   ├── sft
    │   │   │   └── gsm8k
    │   │   │   │   ├── run_deepseek_6b7.sh
    │   │   │   │   ├── run_gemma_2b.sh
    │   │   │   │   └── run_gemma_7b.sh
    │   │   ├── sky-t1
    │   │   │   ├── run-sky-t1-7b-step2.sh
    │   │   │   ├── run-sky-t1-7b-step4.sh
    │   │   │   ├── run-sky-t1-7b-zero.sh
    │   │   │   └── run-sky-t1-mini.sh
    │   │   └── split_placement
    │   │   │   ├── README.md
    │   │   │   ├── config
    │   │   │       └── ppo_trainer_split.yaml
    │   │   │   ├── main_ppo_split.py
    │   │   │   ├── run_deepseek7b_llm.sh
    │   │   │   └── split_monkey_patch.py
    │   ├── patches
    │   │   └── megatron_v4.patch
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── tests
    │   │   ├── ray
    │   │   │   ├── check_worker_alive
    │   │   │   │   └── main.py
    │   │   │   ├── detached_worker
    │   │   │   │   ├── README.md
    │   │   │   │   ├── client.py
    │   │   │   │   ├── run.sh
    │   │   │   │   └── server.py
    │   │   │   ├── test_check_worker_alive.py
    │   │   │   ├── test_colocated_workers.py
    │   │   │   ├── test_data_transfer.py
    │   │   │   ├── test_driverfunc_to_worker.py
    │   │   │   ├── test_high_level_scheduling_api.py
    │   │   │   ├── test_ray_local_envs.py
    │   │   │   ├── test_remote_api.py
    │   │   │   ├── test_rvdz.py
    │   │   │   ├── test_worker_group_basics.py
    │   │   │   └── test_worker_group_torch.py
    │   │   └── verl
    │   │   │   └── utils
    │   │   │       └── dataset
    │   │   │           ├── test_rl_dataset.py
    │   │   │           ├── test_rm_dataset.py
    │   │   │           └── test_sft_dataset.py
    │   └── verl
    │   │   ├── __init__.py
    │   │   ├── models
    │   │       ├── README.md
    │   │       ├── __init__.py
    │   │       ├── llama
    │   │       │   ├── __init__.py
    │   │       │   └── megatron
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── checkpoint_utils
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── llama_loader.py
    │   │       │   │       └── llama_saver.py
    │   │       │   │   ├── layers
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── parallel_attention.py
    │   │       │   │       ├── parallel_decoder.py
    │   │       │   │       ├── parallel_linear.py
    │   │       │   │       ├── parallel_mlp.py
    │   │       │   │       └── parallel_rmsnorm.py
    │   │       │   │   └── modeling_llama_megatron.py
    │   │       ├── registry.py
    │   │       ├── transformers
    │   │       │   ├── __init__.py
    │   │       │   ├── llama.py
    │   │       │   ├── monkey_patch.py
    │   │       │   └── qwen2.py
    │   │       └── weight_loader_registry.py
    │   │   ├── protocol.py
    │   │   ├── single_controller
    │   │       ├── __init__.py
    │   │       ├── base
    │   │       │   ├── __init__.py
    │   │       │   ├── decorator.py
    │   │       │   ├── megatron
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── worker.py
    │   │       │   │   └── worker_group.py
    │   │       │   ├── register_center
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── ray.py
    │   │       │   ├── worker.py
    │   │       │   └── worker_group.py
    │   │       └── ray
    │   │       │   ├── __init__.py
    │   │       │   ├── base.py
    │   │       │   └── megatron.py
    │   │   ├── third_party
    │   │       ├── __init__.py
    │   │       └── vllm
    │   │       │   ├── __init__.py
    │   │       │   ├── vllm_v_0_3_1
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── tokenizer.py
    │   │       │       ├── weight_loaders.py
    │   │       │       └── worker.py
    │   │       │   ├── vllm_v_0_4_2
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── dtensor_weight_loaders.py
    │   │       │       ├── hf_weight_loader.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── megatron_weight_loaders.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── spmd_gpu_executor.py
    │   │       │       ├── tokenizer.py
    │   │       │       └── worker.py
    │   │       │   ├── vllm_v_0_5_4
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── dtensor_weight_loaders.py
    │   │       │       ├── hf_weight_loader.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── megatron_weight_loaders.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── spmd_gpu_executor.py
    │   │       │       ├── tokenizer.py
    │   │       │       └── worker.py
    │   │       │   └── vllm_v_0_6_3
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── dtensor_weight_loaders.py
    │   │       │       ├── hf_weight_loader.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── megatron_weight_loaders.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── spmd_gpu_executor.py
    │   │       │       ├── tokenizer.py
    │   │       │       └── worker.py
    │   │   ├── trainer
    │   │       ├── __init__.py
    │   │       ├── config
    │   │       │   ├── evaluation.yaml
    │   │       │   ├── generation.yaml
    │   │       │   ├── ppo_megatron_trainer.yaml
    │   │       │   ├── ppo_trainer.yaml
    │   │       │   └── sft_trainer.yaml
    │   │       ├── fsdp_sft_trainer.py
    │   │       ├── main_eval.py
    │   │       ├── main_generation.py
    │   │       ├── main_ppo.py
    │   │       ├── main_ppo_sky.py
    │   │       ├── ppo
    │   │       │   ├── __init__.py
    │   │       │   ├── core_algos.py
    │   │       │   └── ray_trainer.py
    │   │       └── runtime_env.yaml
    │   │   ├── utils
    │   │       ├── __init__.py
    │   │       ├── config.py
    │   │       ├── dataset
    │   │       │   ├── README.md
    │   │       │   ├── __init__.py
    │   │       │   ├── rl_dataset.py
    │   │       │   ├── rm_dataset.py
    │   │       │   └── sft_dataset.py
    │   │       ├── debug
    │   │       │   ├── __init__.py
    │   │       │   ├── performance.py
    │   │       │   └── trajectory_tracker.py
    │   │       ├── distributed.py
    │   │       ├── flops_counter.py
    │   │       ├── fs.py
    │   │       ├── fsdp_utils.py
    │   │       ├── hdfs_io.py
    │   │       ├── import_utils.py
    │   │       ├── logger
    │   │       │   ├── __init__.py
    │   │       │   └── aggregate_logger.py
    │   │       ├── logging_utils.py
    │   │       ├── megatron
    │   │       │   ├── __init__.py
    │   │       │   ├── memory.py
    │   │       │   ├── optimizer.py
    │   │       │   ├── optimizer_config.py
    │   │       │   ├── pipeline_parallel.py
    │   │       │   ├── sequence_parallel.py
    │   │       │   └── tensor_parallel.py
    │   │       ├── megatron_utils.py
    │   │       ├── memory_buffer.py
    │   │       ├── model.py
    │   │       ├── py_functional.py
    │   │       ├── ray_utils.py
    │   │       ├── rendezvous
    │   │       │   ├── __init__.py
    │   │       │   └── ray_backend.py
    │   │       ├── reward_score
    │   │       │   ├── __init__.py
    │   │       │   ├── evaluation_utils
    │   │       │   │   ├── code_util
    │   │       │   │   │   ├── __init__.py
    │   │       │   │   │   ├── testing_util.py
    │   │       │   │   │   └── utils.py
    │   │       │   │   └── math_util
    │   │       │   │   │   ├── __init__.py
    │   │       │   │   │   ├── grader.py
    │   │       │   │   │   ├── math_normalize.py
    │   │       │   │   │   └── testing_utlis.py
    │   │       │   ├── gsm8k.py
    │   │       │   ├── gt_verifier.py
    │   │       │   └── math.py
    │   │       ├── seqlen_balancing.py
    │   │       ├── tokenizer.py
    │   │       ├── torch_dtypes.py
    │   │       ├── torch_functional.py
    │   │       ├── tracking.py
    │   │       └── ulysses.py
    │   │   ├── version
    │   │       └── version
    │   │   └── workers
    │   │       ├── __init__.py
    │   │       ├── actor
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── dp_actor.py
    │   │           └── megatron_actor.py
    │   │       ├── critic
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── dp_critic.py
    │   │           └── megatron_critic.py
    │   │       ├── fsdp_workers.py
    │   │       ├── megatron_workers.py
    │   │       ├── reward_model
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           └── megatron
    │   │           │   ├── __init__.py
    │   │           │   └── reward_model.py
    │   │       ├── rollout
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── hf_rollout.py
    │   │           ├── naive
    │   │           │   ├── __init__.py
    │   │           │   └── naive_rollout.py
    │   │           ├── tokenizer.py
    │   │           └── vllm_rollout
    │   │           │   ├── __init__.py
    │   │           │   └── vllm_rollout.py
    │   │       └── sharding_manager
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── fsdp_ulysses.py
    │   │           ├── fsdp_vllm.py
    │   │           └── megatron_vllm.py
    ├── test-time-scaling
    │   ├── README.md
    │   ├── assets
    │   │   └── figure1.png
    │   ├── codecontest_evaluate_multiprocess.py
    │   ├── evaluate_multiprocess.py
    │   ├── live_code_bench_execute.py
    │   ├── live_code_bench_program.py
    │   ├── pattern_icl_map.json
    │   ├── scripts
    │   │   ├── baselines
    │   │   │   ├── 4o_mini.sh
    │   │   │   ├── 4o_mini_cct.sh
    │   │   │   ├── o1_mini.sh
    │   │   │   ├── o1_preview.sh
    │   │   │   ├── o3_mini.sh
    │   │   │   ├── qwen0.5b.sh
    │   │   │   ├── qwen1.5b.sh
    │   │   │   ├── qwen14b.sh
    │   │   │   ├── qwen32b.sh
    │   │   │   ├── qwen3b.sh
    │   │   │   ├── qwen7b.sh
    │   │   │   ├── qwq32b.sh
    │   │   │   ├── r1qwen14b.sh
    │   │   │   ├── r1qwen32b.sh
    │   │   │   └── r1qwen7b.sh
    │   │   ├── baselines_selfdebug
    │   │   │   ├── 4o_mini_n_1_debug_public3_random.sh
    │   │   │   ├── o1_mini_n_1_debug_public3_random.sh
    │   │   │   ├── qwen0.5b_n_1_debug_public3_random.sh
    │   │   │   ├── qwen1.5b_n_1_debug_public3_random.sh
    │   │   │   ├── qwen14b_n_1_debug_public3_random.sh
    │   │   │   ├── qwen32b_n_1_debug_public3_random.sh
    │   │   │   ├── qwen3b_n_1_debug_public3_random.sh
    │   │   │   ├── qwen7b_n_1_debug_public3_random.sh
    │   │   │   ├── qwq32b_n_1_debug_public3_random.sh
    │   │   │   ├── r1qwen14b_n_1_debug_public3_random.sh
    │   │   │   ├── r1qwen32b_n_1_debug_public3_random.sh
    │   │   │   └── r1qwen7b_n_1_debug_public3_random.sh
    │   │   ├── final_first_cached
    │   │   │   ├── 4omini_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── batch_small_models_first.sh
    │   │   │   ├── gh200_1_batch.sh
    │   │   │   ├── gh200_2_batch.sh
    │   │   │   ├── o1mini_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwen0.5b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwen1.5b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwen14b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwen32b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwen3b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwen7b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── qwq32b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── r1qwen14b_n_16_debug_public3_select_first_cached.sh
    │   │   │   ├── r1qwen32b_n_16_debug_public3_select_first_cached.sh
    │   │   │   └── r1qwen7b_n_16_debug_public3_select_first_cached.sh
    │   │   ├── final_gentest_notimeout_cached
    │   │   │   ├── 4omini_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── batch_small_models_gentest.sh
    │   │   │   ├── o1mini_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwen0.5b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwen1.5b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwen14b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwen32b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwen3b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwen7b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── qwq32b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── r1qwen14b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   ├── r1qwen32b_n_16_debug_public3_select_4omini_cached.sh
    │   │   │   └── r1qwen7b_n_16_debug_public3_select_4omini_cached.sh
    │   │   ├── final_oracle
    │   │   │   ├── 4omini_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── o1mini_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── qwen0.5b_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── qwen1.5b_n_32_debug_public3_select_oracle.sh
    │   │   │   ├── qwen14b_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── qwen32b_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── qwen32b_n_16_debug_public3_select_oracle_icl_patterns.sh
    │   │   │   ├── qwen3b_n_32_debug_public3_select_oracle.sh
    │   │   │   ├── qwen7b_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── qwq32b_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── r1qwen14b_n_16_debug_public3_select_oracle.sh
    │   │   │   ├── r1qwen32b_n_16_debug_public3_select_oracle.sh
    │   │   │   └── r1qwen7b_n_16_debug_public3_select_oracle.sh
    │   │   ├── final_random_cached
    │   │   │   ├── 4omini_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── batch_small_models_random.sh
    │   │   │   ├── o1mini_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwen0.5b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwen1.5b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwen14b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwen32b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwen3b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwen7b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── qwq32b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── r1qwen14b_n_16_debug_public3_select_random_cached.sh
    │   │   │   ├── r1qwen32b_n_16_debug_public3_select_random_cached.sh
    │   │   │   └── r1qwen7b_n_16_debug_public3_select_random_cached.sh
    │   │   ├── final_tool_assisted_cached
    │   │   │   ├── 4omini_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── batch_small_models_tool_assisted.sh
    │   │   │   ├── o1mini_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwen0.5b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwen1.5b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwen14b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwen32b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwen3b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwen7b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── qwq32b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── r1qwen14b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   ├── r1qwen32b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   │   └── r1qwen7b_n_16_debug_public3_select_tool_assisted_cached.sh
    │   │   ├── majority_baselines
    │   │   │   ├── 4o_mini_n_16_majority.sh
    │   │   │   ├── o1_mini_n_16_majority.sh
    │   │   │   ├── qwen0.5b_n_16_majority.sh
    │   │   │   ├── qwen1.5b_n_16_majority.sh
    │   │   │   ├── qwen14b_n_16_majority.sh
    │   │   │   ├── qwen32b_n_16_majority.sh
    │   │   │   ├── qwen3b_n_16_majority.sh
    │   │   │   ├── qwen7b_n_16_majority.sh
    │   │   │   ├── qwq32b_n_16_majority.sh
    │   │   │   ├── r1qwen14b_n_16_majority.sh
    │   │   │   ├── r1qwen32b_n_16_majority.sh
    │   │   │   └── r1qwen7b_n_16_majority.sh
    │   │   ├── sec4_parallel_sample
    │   │   │   ├── temp02_4o_mini.sh
    │   │   │   ├── temp02_qwen7b.sh
    │   │   │   ├── temp02_qwen_32b.sh
    │   │   │   ├── temp05_4o_mini.sh
    │   │   │   ├── temp05_qwen7b.sh
    │   │   │   ├── temp05_qwen_32b.sh
    │   │   │   ├── temp09_4o_mini.sh
    │   │   │   ├── temp09_qwen7b.sh
    │   │   │   ├── temp09_qwen_32b.sh
    │   │   │   ├── vanilla_4o_mini.sh
    │   │   │   ├── vanilla_qwen_32b.sh
    │   │   │   ├── vanilla_qwen_7b.sh
    │   │   │   ├── vanilla_qwq_32b.sh
    │   │   │   ├── vanilla_qwq_32b_hard.sh
    │   │   │   └── vanilla_qwq_32b_medium.sh
    │   │   ├── sec5_revision
    │   │   │   ├── last_4o_mini.sh
    │   │   │   ├── last_qwen_32b.sh
    │   │   │   ├── last_qwen_7b.sh
    │   │   │   ├── last_qwq_32b_with_4o_debug.sh
    │   │   │   ├── refine_4o_mini.sh
    │   │   │   ├── refine_qwen_32b.sh
    │   │   │   ├── refine_qwen_7b.sh
    │   │   │   ├── vanilla_4o_mini.sh
    │   │   │   ├── vanilla_qwen_32b.sh
    │   │   │   ├── vanilla_qwen_7b.sh
    │   │   │   ├── vanilla_wo_reasoning_qwq_32b_with_4o_debug.sh
    │   │   │   └── vanilla_wo_reasoning_qwq_self_rewrite.sh
    │   │   ├── sec6
    │   │   │   ├── 4o_mini_tool_assisted.sh
    │   │   │   ├── 4o_mini_vanilla_baseline.sh
    │   │   │   ├── 4o_mini_vanilla_with_4omini_generated_and_timeout_test.sh
    │   │   │   ├── qwen_32b_tool_assisted.sh
    │   │   │   ├── qwen_32b_vanilla_baseline.sh
    │   │   │   ├── qwen_32b_with4omini_test_and_timeout_vanilla.sh
    │   │   │   ├── qwen_7b_tool_assisted.sh
    │   │   │   ├── qwen_7b_vanilla_baseline.sh
    │   │   │   ├── qwen_7b_with4omini_test_and_timeout_vanilla.sh
    │   │   │   ├── qwq_32b_with_4omini_test_and_timeout_wo_reasoning_vanilla_4o_debug.sh
    │   │   │   ├── qwq_32b_wo_reasoning_vanilla_4o_debug_baseline.sh
    │   │   │   └── qwq_32b_wo_reasoning_vanilla_4o_debug_tool_assisted.sh
    │   │   ├── sec6_llm_judge_baseline
    │   │   │   ├── 4o_mini_llm_judge_baseline.sh
    │   │   │   ├── qwen_32b_llm_judge_baseline.sh
    │   │   │   ├── qwen_7b_llm_judge_baseline.sh
    │   │   │   └── qwq_32b_wo_reasoning_vanilla_4o_debug_llm_judge_baseline.sh
    │   │   └── sec6_o1_generated
    │   │   │   ├── 4o_mini_vanilla_with_o1_generated_and_timeout_test.sh
    │   │   │   ├── qwen_32b_with_o1__test_and_timeout_vanilla.sh
    │   │   │   ├── qwen_7b_with_o1_test_and_timeout_vanilla.sh
    │   │   │   └── qwq_32b_with_o1_test_and_timeout_wo_reasoning_vanilla_4o_debug.sh
    │   └── util.py
    └── train
    │   ├── LLaMA-Factory
    │       ├── .deepspeed_env
    │       ├── .dockerignore
    │       ├── .env.local
    │       ├── .gitattributes
    │       ├── .github
    │       │   ├── CODE_OF_CONDUCT.md
    │       │   ├── CONTRIBUTING.md
    │       │   ├── ISSUE_TEMPLATE
    │       │   │   └── bug-report.yml
    │       │   ├── PULL_REQUEST_TEMPLATE.md
    │       │   ├── SECURITY.md
    │       │   └── workflows
    │       │   │   ├── label_issue.yml
    │       │   │   ├── publish.yml
    │       │   │   └── tests.yml
    │       ├── .gitignore
    │       ├── .pre-commit-config.yaml
    │       ├── CITATION.cff
    │       ├── LICENSE
    │       ├── MANIFEST.in
    │       ├── Makefile
    │       ├── README.md
    │       ├── README_zh.md
    │       ├── assets
    │       │   ├── benchmark.svg
    │       │   ├── logo.png
    │       │   ├── wechat.jpg
    │       │   └── wechat_npu.jpg
    │       ├── data
    │       │   ├── README.md
    │       │   ├── README_zh.md
    │       │   ├── alpaca_en_demo.json
    │       │   ├── alpaca_zh_demo.json
    │       │   ├── belle_multiturn
    │       │   │   └── belle_multiturn.py
    │       │   ├── c4_demo.json
    │       │   ├── dataset_info.json
    │       │   ├── dpo_en_demo.json
    │       │   ├── dpo_zh_demo.json
    │       │   ├── glaive_toolcall_en_demo.json
    │       │   ├── glaive_toolcall_zh_demo.json
    │       │   ├── hh_rlhf_en
    │       │   │   └── hh_rlhf_en.py
    │       │   ├── identity.json
    │       │   ├── kto_en_demo.json
    │       │   ├── mllm_demo.json
    │       │   ├── mllm_demo_data
    │       │   │   ├── 1.jpg
    │       │   │   ├── 1.mp4
    │       │   │   ├── 2.avi
    │       │   │   ├── 2.jpg
    │       │   │   ├── 3.jpg
    │       │   │   └── 3.mp4
    │       │   ├── mllm_video_demo.json
    │       │   ├── ultra_chat
    │       │   │   └── ultra_chat.py
    │       │   └── wiki_demo.txt
    │       ├── docker
    │       │   ├── docker-cuda
    │       │   │   ├── Dockerfile
    │       │   │   └── docker-compose.yml
    │       │   ├── docker-npu
    │       │   │   ├── Dockerfile
    │       │   │   └── docker-compose.yml
    │       │   └── docker-rocm
    │       │   │   ├── Dockerfile
    │       │   │   └── docker-compose.yml
    │       ├── evaluation
    │       │   ├── ceval
    │       │   │   ├── ceval.py
    │       │   │   ├── ceval.zip
    │       │   │   └── mapping.json
    │       │   ├── cmmlu
    │       │   │   ├── cmmlu.py
    │       │   │   ├── cmmlu.zip
    │       │   │   └── mapping.json
    │       │   └── mmlu
    │       │   │   ├── mapping.json
    │       │   │   ├── mmlu.py
    │       │   │   └── mmlu.zip
    │       ├── examples
    │       │   ├── README.md
    │       │   ├── README_zh.md
    │       │   ├── accelerate
    │       │   │   └── fsdp_config.yaml
    │       │   ├── deepspeed
    │       │   │   ├── ds_z0_config.json
    │       │   │   ├── ds_z2_config.json
    │       │   │   ├── ds_z2_offload_config.json
    │       │   │   ├── ds_z3_config.json
    │       │   │   └── ds_z3_offload_config.json
    │       │   ├── extras
    │       │   │   ├── adam_mini
    │       │   │   │   └── qwen2_full_sft.yaml
    │       │   │   ├── badam
    │       │   │   │   └── llama3_full_sft.yaml
    │       │   │   ├── fsdp_qlora
    │       │   │   │   ├── llama3_lora_sft.yaml
    │       │   │   │   └── train.sh
    │       │   │   ├── galore
    │       │   │   │   └── llama3_full_sft.yaml
    │       │   │   ├── llama_pro
    │       │   │   │   ├── expand.sh
    │       │   │   │   └── llama3_freeze_sft.yaml
    │       │   │   ├── loraplus
    │       │   │   │   └── llama3_lora_sft.yaml
    │       │   │   ├── mod
    │       │   │   │   └── llama3_full_sft.yaml
    │       │   │   ├── nlg_eval
    │       │   │   │   └── llama3_lora_predict.yaml
    │       │   │   └── pissa
    │       │   │   │   ├── init.sh
    │       │   │   │   └── llama3_lora_sft.yaml
    │       │   ├── inference
    │       │   │   ├── llama3.yaml
    │       │   │   ├── llama3_lora_sft.yaml
    │       │   │   ├── llama3_vllm.yaml
    │       │   │   ├── llava1_5.yaml
    │       │   │   └── qwen2_vl.yaml
    │       │   ├── merge_lora
    │       │   │   ├── llama3_gptq.yaml
    │       │   │   ├── llama3_lora_sft.yaml
    │       │   │   └── qwen2vl_lora_sft.yaml
    │       │   ├── train_full
    │       │   │   ├── llama3_full_sft.yaml
    │       │   │   ├── qwen2_full_sft.yaml
    │       │   │   ├── qwen2_full_simpo.yaml
    │       │   │   └── qwen2vl_full_sft.yaml
    │       │   ├── train_lora
    │       │   │   ├── llama3_lora_dpo.yaml
    │       │   │   ├── llama3_lora_eval.yaml
    │       │   │   ├── llama3_lora_kto.yaml
    │       │   │   ├── llama3_lora_ppo.yaml
    │       │   │   ├── llama3_lora_pretrain.yaml
    │       │   │   ├── llama3_lora_reward.yaml
    │       │   │   ├── llama3_lora_sft.yaml
    │       │   │   ├── llama3_lora_sft_ds3.yaml
    │       │   │   ├── llama3_preprocess.yaml
    │       │   │   ├── llava1_5_lora_sft.yaml
    │       │   │   ├── qwen2_lora_config.yaml
    │       │   │   ├── qwen2vl_lora_dpo.yaml
    │       │   │   └── qwen2vl_lora_sft.yaml
    │       │   └── train_qlora
    │       │   │   ├── llama3_lora_sft_aqlm.yaml
    │       │   │   ├── llama3_lora_sft_awq.yaml
    │       │   │   ├── llama3_lora_sft_gptq.yaml
    │       │   │   └── llama3_lora_sft_otfq.yaml
    │       ├── launch_3_nodes.sh
    │       ├── pyproject.toml
    │       ├── requirements.txt
    │       ├── scripts
    │       │   ├── api_example
    │       │   │   ├── test_image.py
    │       │   │   └── test_toolcall.py
    │       │   ├── convert_ckpt
    │       │   │   ├── llamafy_baichuan2.py
    │       │   │   └── llamafy_qwen.py
    │       │   ├── llama_pro.py
    │       │   ├── loftq_init.py
    │       │   ├── pissa_init.py
    │       │   ├── stat_utils
    │       │   │   ├── cal_flops.py
    │       │   │   ├── cal_lr.py
    │       │   │   ├── cal_mfu.py
    │       │   │   ├── cal_ppl.py
    │       │   │   └── length_cdf.py
    │       │   └── vllm_infer.py
    │       ├── setup.py
    │       ├── single_node.sh
    │       ├── src
    │       │   ├── api.py
    │       │   ├── llamafactory
    │       │   │   ├── __init__.py
    │       │   │   ├── api
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── app.py
    │       │   │   │   ├── chat.py
    │       │   │   │   ├── common.py
    │       │   │   │   └── protocol.py
    │       │   │   ├── chat
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── base_engine.py
    │       │   │   │   ├── chat_model.py
    │       │   │   │   ├── hf_engine.py
    │       │   │   │   └── vllm_engine.py
    │       │   │   ├── cli.py
    │       │   │   ├── data
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── aligner.py
    │       │   │   │   ├── collator.py
    │       │   │   │   ├── data_utils.py
    │       │   │   │   ├── formatter.py
    │       │   │   │   ├── loader.py
    │       │   │   │   ├── mm_plugin.py
    │       │   │   │   ├── parser.py
    │       │   │   │   ├── preprocess.py
    │       │   │   │   ├── processors
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── feedback.py
    │       │   │   │   │   ├── pairwise.py
    │       │   │   │   │   ├── pretrain.py
    │       │   │   │   │   ├── processor_utils.py
    │       │   │   │   │   ├── supervised.py
    │       │   │   │   │   └── unsupervised.py
    │       │   │   │   ├── template.py
    │       │   │   │   └── tool_utils.py
    │       │   │   ├── eval
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── evaluator.py
    │       │   │   │   └── template.py
    │       │   │   ├── extras
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── constants.py
    │       │   │   │   ├── env.py
    │       │   │   │   ├── logging.py
    │       │   │   │   ├── misc.py
    │       │   │   │   ├── packages.py
    │       │   │   │   └── ploting.py
    │       │   │   ├── hparams
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── data_args.py
    │       │   │   │   ├── evaluation_args.py
    │       │   │   │   ├── finetuning_args.py
    │       │   │   │   ├── generating_args.py
    │       │   │   │   ├── model_args.py
    │       │   │   │   └── parser.py
    │       │   │   ├── launcher.py
    │       │   │   ├── model
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── adapter.py
    │       │   │   │   ├── loader.py
    │       │   │   │   ├── model_utils
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── attention.py
    │       │   │   │   │   ├── checkpointing.py
    │       │   │   │   │   ├── embedding.py
    │       │   │   │   │   ├── liger_kernel.py
    │       │   │   │   │   ├── longlora.py
    │       │   │   │   │   ├── misc.py
    │       │   │   │   │   ├── mod.py
    │       │   │   │   │   ├── moe.py
    │       │   │   │   │   ├── packing.py
    │       │   │   │   │   ├── quantization.py
    │       │   │   │   │   ├── rope.py
    │       │   │   │   │   ├── unsloth.py
    │       │   │   │   │   ├── valuehead.py
    │       │   │   │   │   └── visual.py
    │       │   │   │   └── patcher.py
    │       │   │   ├── train
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── callbacks.py
    │       │   │   │   ├── dpo
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── trainer.py
    │       │   │   │   │   └── workflow.py
    │       │   │   │   ├── kto
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── trainer.py
    │       │   │   │   │   └── workflow.py
    │       │   │   │   ├── ppo
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── ppo_utils.py
    │       │   │   │   │   ├── trainer.py
    │       │   │   │   │   └── workflow.py
    │       │   │   │   ├── pt
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── trainer.py
    │       │   │   │   │   └── workflow.py
    │       │   │   │   ├── rm
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── metric.py
    │       │   │   │   │   ├── trainer.py
    │       │   │   │   │   └── workflow.py
    │       │   │   │   ├── sft
    │       │   │   │   │   ├── __init__.py
    │       │   │   │   │   ├── metric.py
    │       │   │   │   │   ├── trainer.py
    │       │   │   │   │   └── workflow.py
    │       │   │   │   ├── test_utils.py
    │       │   │   │   ├── trainer_utils.py
    │       │   │   │   └── tuner.py
    │       │   │   └── webui
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── chatter.py
    │       │   │   │   ├── common.py
    │       │   │   │   ├── components
    │       │   │   │       ├── __init__.py
    │       │   │   │       ├── chatbot.py
    │       │   │   │       ├── data.py
    │       │   │   │       ├── eval.py
    │       │   │   │       ├── export.py
    │       │   │   │       ├── infer.py
    │       │   │   │       ├── top.py
    │       │   │   │       └── train.py
    │       │   │   │   ├── css.py
    │       │   │   │   ├── engine.py
    │       │   │   │   ├── interface.py
    │       │   │   │   ├── locales.py
    │       │   │   │   ├── manager.py
    │       │   │   │   ├── runner.py
    │       │   │   │   └── utils.py
    │       │   ├── train.py
    │       │   └── webui.py
    │       ├── tests
    │       │   ├── data
    │       │   │   ├── processors
    │       │   │   │   ├── test_feedback.py
    │       │   │   │   ├── test_pairwise.py
    │       │   │   │   ├── test_processor_utils.py
    │       │   │   │   ├── test_supervised.py
    │       │   │   │   └── test_unsupervised.py
    │       │   │   ├── test_collator.py
    │       │   │   ├── test_formatter.py
    │       │   │   ├── test_mm_plugin.py
    │       │   │   └── test_template.py
    │       │   ├── e2e
    │       │   │   ├── test_chat.py
    │       │   │   └── test_train.py
    │       │   ├── eval
    │       │   │   └── test_eval_template.py
    │       │   └── model
    │       │   │   ├── model_utils
    │       │   │       ├── test_attention.py
    │       │   │       ├── test_checkpointing.py
    │       │   │       └── test_packing.py
    │       │   │   ├── test_base.py
    │       │   │   ├── test_freeze.py
    │       │   │   ├── test_full.py
    │       │   │   ├── test_lora.py
    │       │   │   └── test_pissa.py
    │       ├── train.sh
    │       └── zero3_config.json
    │   └── README.md
├── tests
    ├── __init__.py
    └── evals
    │   ├── __init__.py
    │   ├── scoring
    │       ├── __init__.py
    │       ├── apps
    │       │   └── test_apps.py
    │       ├── taco
    │       │   └── test_taco.py
    │       └── test_base.py
    │   ├── tasks
    │       ├── test_aime.py
    │       ├── test_amc.py
    │       ├── test_math.py
    │       ├── test_mmlu.py
    │       ├── test_mmlu_pro.py
    │       └── test_preprocessing.py
    │   ├── test_cli.py
    │   └── util
    │       ├── test_cli_util.py
    │       ├── test_common.py
    │       └── test_math_parsing.py
└── uv.lock


/.gitattributes:
--------------------------------------------------------------------------------
1 | data/Sky-T1_data_17k.json filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: v0.9.3
 4 |     hooks:
 5 |       - id: ruff
 6 |         args: [ --fix, --exit-non-zero-on-fix ]
 7 |         # NOTE (sumanthrh): Many of the files excluded here are used for validating code generation, and linters do not recognize some of the logic in these files. skythought/train is excluded for now because it's a fork of Llamafactory
 8 |         exclude: (^skythought/train/.*|^skythought/skythought-rl/.*|pyext2\.py|taco_util\.py|apps_util\.py|scripts/prompts\.py|skythought/test-time-scaling/.*)$
 9 | 
10 | 
11 |   # Black needs to be ran after ruff with --fix
12 |   - repo: https://github.com/psf/black
13 |     rev: 24.10.0
14 |     hooks:
15 |       - id: black
16 |         exclude: (^skythought/train/.*|^skythought/skythought-rl/.*|pyext2\.py|skythought/test-time-scaling/.*)$
17 | 


--------------------------------------------------------------------------------
/assets/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/assets/.gitkeep


--------------------------------------------------------------------------------
/assets/cli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/assets/cli.png


--------------------------------------------------------------------------------
/assets/flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/assets/flow.png


--------------------------------------------------------------------------------
/format.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | set -e
 3 | 
 4 | if command -v uv >/dev/null 2>&1; then
 5 |     uv pip install -q pre-commit
 6 | else 
 7 |     pip install -q pre-commit
 8 | fi
 9 | 
10 | # pre-commit run --all-files always runs from the root directory. we run this only on tools/ for now. 
11 | pre-commit run --all-files --config .pre-commit-config.yaml
12 | 


--------------------------------------------------------------------------------
/recipes/sky-t1-preview/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/recipes/sky-t1-preview/__init__.py


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/scripts/__init__.py


--------------------------------------------------------------------------------
/skythought/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/__init__.py


--------------------------------------------------------------------------------
/skythought/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/evals/__init__.py


--------------------------------------------------------------------------------
/skythought/evals/batch/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = []
 2 | 
 3 | from .engines import init_engine_from_config
 4 | from .pipeline import Pipeline
 5 | from .workload import (
 6 |     EvalWorkload,
 7 | )
 8 | 
 9 | __all__ = [
10 |     "Pipeline",
11 |     "init_engine_from_config",
12 |     "EvalWorkload",
13 | ]
14 | 


--------------------------------------------------------------------------------
/skythought/evals/batch/engines/__init__.py:
--------------------------------------------------------------------------------
 1 | """LLM Engines."""
 2 | 
 3 | __all__ = []
 4 | 
 5 | from .initializer import EngineInitializerBase, init_engine_from_config
 6 | 
 7 | __all__ = [
 8 |     "EngineInitializerBase",
 9 |     "init_engine_from_config",
10 | ]
11 | 


--------------------------------------------------------------------------------
/skythought/evals/batch/engines/base.py:
--------------------------------------------------------------------------------
 1 | """Engine base."""
 2 | 
 3 | from typing import Any, AsyncGenerator, Dict
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class EngineBase:
 9 |     """Base class for engines."""
10 | 
11 |     async def __call__(
12 |         self, batch: Dict[str, np.ndarray]
13 |     ) -> AsyncGenerator[Dict[str, Any], None]:
14 |         """Call the LLM engine asynchronously to process a Ray Data batch.
15 | 
16 |         Args:
17 |             batch: The batch.
18 | 
19 |         Yields:
20 |             The output.
21 |         """
22 |         raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/skythought/evals/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/evals/common/__init__.py


--------------------------------------------------------------------------------
/skythought/evals/labeled_numina_difficulty/README.md:
--------------------------------------------------------------------------------
1 | # Labeled NUMINA Difficulty Data 
2 | 
3 | We also include data of labeled difficulty from NUMINA, in the following files: `labeled_amc_aime_0_-1.json`, `labeled_math_0_-1.json`, `labeled_olympiads_0_-1.json`. These files can be found and downloaded from [HuggingFace](https://huggingface.co/datasets/NovaSky-AI/labeled_numina_difficulty). 


--------------------------------------------------------------------------------
/skythought/evals/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import ModelConfig, get_system_prompt_keys
2 | 
3 | __all__ = ["ModelConfig", "get_system_prompt_keys"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Scorer
2 | from .gsm8k import GSM8KScorer
3 | from .math import MathEqualScorer, MathVerifyScorer
4 | 
5 | __all__ = ["Scorer", "MathEqualScorer", "MathVerifyScorer", "GSM8KScorer"]
6 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/apps/__init__.py:
--------------------------------------------------------------------------------
1 | from .apps_scorer import APPSScorer
2 | 
3 | __all__ = ["APPSScorer"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/gsm8k/__init__.py:
--------------------------------------------------------------------------------
1 | from .gsm8k_scorer import GSM8KScorer
2 | 
3 | __all__ = ["GSM8KScorer"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/ifeval/__init__.py:
--------------------------------------------------------------------------------
1 | from .ifeval_scorer import IfEvalScorer
2 | 
3 | __all__ = ["IfEvalScorer"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/livecodebench/__init__.py:
--------------------------------------------------------------------------------
1 | from .livecodebench_scorer import LiveCodeBenchScorer
2 | 
3 | __all__ = ["LiveCodeBenchScorer"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/math/__init__.py:
--------------------------------------------------------------------------------
1 | from .math_scorer import MathEqualScorer, MathVerifyScorer
2 | 
3 | __all__ = ["MathVerifyScorer", "MathEqualScorer"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/taco/__init__.py:
--------------------------------------------------------------------------------
1 | from .taco_scorer import TACOScorer
2 | 
3 | __all__ = ["TACOScorer"]
4 | 


--------------------------------------------------------------------------------
/skythought/evals/scoring/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/evals/scoring/utils/__init__.py


--------------------------------------------------------------------------------
/skythought/evals/tasks/aime/aime24.yaml:
--------------------------------------------------------------------------------
 1 | handler: aime
 2 | dataset_path: AI-MO/aimo-validation-aime
 3 | dataset_split: train
 4 | question_key: problem
 5 | answer_key: answer
 6 | templating_parameters:
 7 |   template: "Return your final response within \\boxed{{}}. {prompt}"
 8 | preprocess_config:
 9 |   url: "2024"
10 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/aime/aime24_sky.yaml:
--------------------------------------------------------------------------------
1 | handler: aime
2 | dataset_path: AI-MO/aimo-validation-aime
3 | dataset_split: train
4 | question_key: problem
5 | answer_key: answer
6 | templating_parameters:
7 |   template: "{prompt}\nReturn your final response within \\boxed{{}}"
8 | preprocess_config:
9 |   url: "2024"


--------------------------------------------------------------------------------
/skythought/evals/tasks/aime/aime25_1.yaml:
--------------------------------------------------------------------------------
 1 | handler: aime
 2 | dataset_path: opencompass/AIME2025
 3 | dataset_subset: AIME2025-I
 4 | dataset_split: test
 5 | question_key: question
 6 | answer_key: answer
 7 | templating_parameters:
 8 |   template: "{prompt}\nReturn your final response within \\boxed{{}}"
 9 | 
10 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/aime/aime25_2.yaml:
--------------------------------------------------------------------------------
 1 | handler: aime
 2 | dataset_path: opencompass/AIME2025
 3 | dataset_subset: AIME2025-II
 4 | dataset_split: test
 5 | question_key: question
 6 | answer_key: answer
 7 | templating_parameters:
 8 |   template: "{prompt}\nReturn your final response within \\boxed{{}}"
 9 | 
10 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/amc23/amc23.yaml:
--------------------------------------------------------------------------------
 1 | handler: amc23
 2 | dataset_path: AI-MO/aimo-validation-amc
 3 | dataset_kwargs:
 4 |   trust_remote_code: true
 5 | dataset_split: train
 6 | question_key: problem
 7 | answer_key: answer
 8 | # Optionally, you can filter the dataset by difficulty
 9 | # preprocess_config:
10 | #   difficulty: easy
11 | templating_parameters: 
12 |   template: "Return your final response within \\boxed{{}}. {problem}"
13 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/amc23/amc23_handler.py:
--------------------------------------------------------------------------------
 1 | from ..math.math_handler import MathTaskHandler
 2 | 
 3 | 
 4 | class AMC23TaskHandler(MathTaskHandler):
 5 |     def load_and_filter_dataset(
 6 |         self, start, end, split=None, subset=None, difficulty=None
 7 |     ):
 8 |         train_data = self.load_dataset(subset=subset, split=split).to_pandas()
 9 |         filtered_data = train_data[train_data["url"].str.contains("2023", na=False)]
10 |         return filtered_data.iloc[start:end] if end > 0 else filtered_data.iloc[start:]
11 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/apps/apps.yaml:
--------------------------------------------------------------------------------
 1 | handler: apps
 2 | dataset_path: codeparrot/apps
 3 | dataset_subset: all
 4 | dataset_kwargs:
 5 |   trust_remote_code: true
 6 | dataset_split: test
 7 | question_key: question
 8 | answer_key: null
 9 | # preprocess_config:
10 | #   difficulty: null
11 | templating_parameters:
12 |   with_fn_name_template: "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}" 
13 |   without_fn_name_template: "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution. {prompt}"
14 |   # Add starter code on top of the initial template
15 |   with_starter_code_template: "{input}\n{starter_code}"
16 | # Optionally, you can filter the dataset by difficulty
17 | # preprocess_config:
18 | #   difficulty: easy
19 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/arc/arc_c.yaml:
--------------------------------------------------------------------------------
1 | handler: arc_c
2 | dataset_path: allenai/ai2_arc
3 | dataset_subset: ARC-Challenge
4 | dataset_split: train
5 | question_key: question
6 | answer_key: answerKey
7 | templating_parameters:
8 |   # We combine choices for a question into choices_text entry in the dataset
9 |   template: "Given the following question and four candidate answers (A, B, C and D), choose the best answer. Your response should end with \"The best answer is [the_answer_letter]\" where [the_answer_letter] is one of the four letter choice (A, B, C, or D).\n{question}\n{choices_text}"


--------------------------------------------------------------------------------
/skythought/evals/tasks/gpqa_diamond/gpqa_diamond.yaml:
--------------------------------------------------------------------------------
1 | handler: gpqa_diamond
2 | dataset_path: Idavidrein/gpqa
3 | dataset_subset: gpqa_diamond
4 | dataset_split: train
5 | question_key: Question
6 | answer_key: Answer
7 | templating_parameters:
8 |   # For GPQA, we combine the Question key and the multiple choice answers into a single `prompt` entry
9 |   template:  "Return your final response within \\boxed{{}} and only include the letter choice (A, B, C, or D) as your final response. {prompt}" 


--------------------------------------------------------------------------------
/skythought/evals/tasks/gsm8k/gsm8k.yaml:
--------------------------------------------------------------------------------
 1 | handler: gsm8k
 2 | dataset_path: "openai/gsm8k"
 3 | dataset_subset: main 
 4 | dataset_split: test
 5 | question_key: question
 6 | answer_key: answer
 7 | templating_parameters:
 8 |   template: "Given the following problem, reason and give a final answer to the problem.\nProblem: {question}\nYour response should end with \"The final answer is [answer]\" where [answer] is the response to the problem."
 9 | 
10 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/liveaops/liveaops.yaml:
--------------------------------------------------------------------------------
1 | handler: liveaops
2 | dataset_path: https://livemathbench.github.io/data/LiveAoPSBench-2024.jsonl
3 | dataset_subset: null # which subset on huggingface. Not applicable for a URL dataset
4 | dataset_split: null # Rule based evaluation
5 | question_key: question
6 | answer_key: answer
7 | templating_parameters: 
8 |   template: "Return your final response within \\boxed{{}}. {question}"
9 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/livecodebench/livecodebench.yaml:
--------------------------------------------------------------------------------
 1 | handler: livecodebench
 2 | dataset_path: "livecodebench/code_generation_lite"  # repo ID in huggingface
 3 | dataset_subset: null 
 4 | dataset_split: test
 5 | dataset_kwargs:
 6 |   version_tag: release_v2
 7 |   trust_remote_code: true
 8 | question_key: task_id
 9 | answer_key: null
10 | templating_parameters: 
11 |   stdin_template: "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}"
12 |   non_stdin_template: "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution. {prompt}"
13 | # Optionally, you can filter the dataset by difficulty
14 | # preprocess_config:
15 | #   difficulty: easy
16 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/livecodebench/livecodebench_easy.yaml:
--------------------------------------------------------------------------------
 1 | handler: livecodebench
 2 | dataset_path: "livecodebench/code_generation_lite"  # repo ID in huggingface
 3 | dataset_subset: null 
 4 | dataset_split: test
 5 | dataset_kwargs:
 6 |   version_tag: release_v2
 7 |   trust_remote_code: true
 8 | question_key: task_id
 9 | answer_key: null
10 | templating_parameters: 
11 |   stdin_template: "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}"
12 |   non_stdin_template: "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution. {prompt}"
13 | preprocess_config:
14 |   difficulty: easy
15 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/livecodebench/livecodebench_hard.yaml:
--------------------------------------------------------------------------------
 1 | handler: livecodebench
 2 | dataset_path: "livecodebench/code_generation_lite"  # repo ID in huggingface
 3 | dataset_subset: null 
 4 | dataset_split: test
 5 | dataset_kwargs:
 6 |   version_tag: release_v2
 7 |   trust_remote_code: true
 8 | question_key: task_id
 9 | answer_key: null
10 | templating_parameters: 
11 |   stdin_template: "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}"
12 |   non_stdin_template: "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution. {prompt}"
13 | preprocess_config:
14 |   difficulty: hard
15 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/livecodebench/livecodebench_medium.yaml:
--------------------------------------------------------------------------------
 1 | handler: livecodebench
 2 | dataset_path: "livecodebench/code_generation_lite"  # repo ID in huggingface
 3 | dataset_subset: null 
 4 | dataset_split: test
 5 | dataset_kwargs:
 6 |   version_tag: release_v2
 7 |   trust_remote_code: true
 8 | question_key: task_id
 9 | answer_key: null
10 | templating_parameters: 
11 |   stdin_template: "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}"
12 |   non_stdin_template: "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution. {prompt}"
13 | preprocess_config:
14 |   difficulty: medium
15 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/math/math500.yaml:
--------------------------------------------------------------------------------
 1 | handler: math
 2 | dataset_path: "qq8933/MATH500"  # repo ID in huggingface
 3 | dataset_subset: null # which subset on huggingface
 4 | question_key: problem
 5 | answer_key: answer
 6 | dataset_split: test
 7 | templating_parameters: 
 8 |   template: "Return your final response within \\boxed{{}}. {problem}"
 9 | # optional. Not supported yet. 
10 | # fewshot_config:
11 | #   - question: ...
12 | #   - target:  ...
13 | # num_fewshot: 0
14 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/minervamath/minervamath.yaml:
--------------------------------------------------------------------------------
1 | handler: math
2 | dataset_path: "svc-huggingface/minerva-math" # repo ID in huggingface
3 | dataset_subset: null # which subset on huggingface
4 | question_key: problem
5 | answer_key: solution
6 | dataset_split: test
7 | templating_parameters: 
8 |   template: "Return your final response within \\boxed{{}}. {problem}"


--------------------------------------------------------------------------------
/skythought/evals/tasks/minervamath/minervamath_handler.py:
--------------------------------------------------------------------------------
 1 | from skythought.evals.util.math_parsing_util import (
 2 |     extract_answer,
 3 |     math_equal,
 4 |     strip_answer_string,
 5 | )
 6 | 
 7 | from ..math.math_handler import MathTaskHandler
 8 | 
 9 | 
10 | class MinervaMathTaskHandler(MathTaskHandler):
11 | 
12 |     def check_correctness(self, problem, generation):
13 |         answer = extract_answer(problem[self.task_config.answer_key])
14 |         answer = strip_answer_string(answer)
15 | 
16 |         pred = extract_answer(generation)
17 |         pred = strip_answer_string(pred)
18 |         return math_equal(pred, answer)
19 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/mmlu/mmlu.yaml:
--------------------------------------------------------------------------------
1 | handler: mmlu
2 | dataset_path: cais/mmlu
3 | dataset_subset: all
4 | dataset_split: test
5 | question_key: question
6 | answer_key: answer
7 | templating_parameters:
8 |   template: "Return your final response within \\boxed{{}}. {prompt}"
9 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/mmlu/mmlu_pro.yaml:
--------------------------------------------------------------------------------
1 | handler: mmlu_pro
2 | dataset_path: TIGER-Lab/MMLU-Pro
3 | dataset_subset: default
4 | dataset_split: test
5 | question_key: question
6 | answer_key: answer
7 | templating_parameters:
8 |   template: "Return your final response within \\boxed{{}}. {prompt}"
9 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/numina/numina.yaml:
--------------------------------------------------------------------------------
 1 | handler: numina
 2 | dataset_path: "AI-MO/NuminaMath-CoT"
 3 | dataset_subset: null
 4 | dataset_split: train
 5 | question_key: problem
 6 | answer_key: solution
 7 | templating_parameters:
 8 |   template: "Return your final response within \\boxed{{}}. {prompt}"
 9 | # Optionally, you can filter the dataset by difficulty
10 | # preprocess_config:
11 | #   filter_difficulty: true
12 | #   math_difficulty_lower_bound: 4
13 | #   math_difficulty_upper_bound: 9
14 | #   source: math
15 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/numina/numina_amc_aime.yaml:
--------------------------------------------------------------------------------
 1 | handler: numina
 2 | dataset_path: "AI-MO/NuminaMath-CoT"
 3 | dataset_subset: null
 4 | dataset_split: train
 5 | question_key: problem
 6 | answer_key: solution
 7 | templating_parameters:
 8 |   template: "Return your final response within \\boxed{{}}. {prompt}"
 9 | preprocess_config:
10 |   filter_difficulty: true
11 |   math_difficulty_lower_bound: 1
12 |   math_difficulty_upper_bound: 9
13 |   source: amc_aime
14 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/numina/numina_math.yaml:
--------------------------------------------------------------------------------
 1 | handler: numina
 2 | dataset_path: "AI-MO/NuminaMath-CoT"
 3 | dataset_subset: null
 4 | dataset_split: train
 5 | question_key: problem
 6 | answer_key: solution
 7 | templating_parameters:
 8 |   template: "Return your final response within \\boxed{{}}. {prompt}"
 9 | preprocess_config:
10 |   filter_difficulty: true
11 |   math_difficulty_lower_bound: 4
12 |   math_difficulty_upper_bound: 9
13 |   source: math
14 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/numina/numina_olympiads.yaml:
--------------------------------------------------------------------------------
 1 | handler: numina
 2 | dataset_path: "AI-MO/NuminaMath-CoT"
 3 | dataset_subset: null
 4 | dataset_split: train
 5 | question_key: problem
 6 | answer_key: solution
 7 | templating_parameters:
 8 |   template: "Return your final response within \\boxed{{}}. {prompt}"
 9 | preprocess_config:
10 |   filter_difficulty: true
11 |   math_difficulty_lower_bound: 9
12 |   math_difficulty_upper_bound: 9
13 |   source: olympiads
14 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/olympiadbench/olympiadbench_handler.py:
--------------------------------------------------------------------------------
 1 | from skythought.evals.util.math_parsing_util import (
 2 |     extract_answer,
 3 |     math_equal,
 4 |     strip_answer_string,
 5 | )
 6 | 
 7 | from ..math.math_handler import MathTaskHandler
 8 | 
 9 | 
10 | class OlympiadBenchMathTaskHandler(MathTaskHandler):
11 |     def check_correctness(self, problem, generation):
12 |         # all problems have final answer in a list
13 |         answer = strip_answer_string(problem[self.task_config.answer_key][0])
14 |         pred = extract_answer(generation)
15 |         pred = strip_answer_string(pred)
16 |         return math_equal(pred, answer)
17 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/olympiadbench/olympiadbench_math_en.yaml:
--------------------------------------------------------------------------------
1 | handler: olympiadbench_math
2 | dataset_path: Hothan/OlympiadBench
3 | dataset_subset: OE_TO_maths_en_COMP
4 | dataset_split: train
5 | question_key: question
6 | answer_key: final_answer
7 | templating_parameters:
8 |   template: "Return your final response within \\boxed{{}}. {question}"
9 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/omni_math/omni_handler.py:
--------------------------------------------------------------------------------
 1 | from skythought.evals.util.math_parsing_util import (
 2 |     extract_answer,
 3 |     math_equal,
 4 |     strip_answer_string,
 5 | )
 6 | 
 7 | from ..math.math_handler import MathTaskHandler
 8 | 
 9 | 
10 | class OMNIMathTaskHandler(MathTaskHandler):
11 |     def generate_prompt(self, problem):
12 |         return self.task_config.templating_parameters["template"].format(**problem)
13 | 
14 |     def check_correctness(self, problem, generation):
15 |         # no preprocessing needed
16 |         answer = problem[self.task_config.answer_key]
17 |         pred = extract_answer(generation)
18 |         pred = strip_answer_string(pred)
19 |         return math_equal(pred, answer)
20 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/omni_math/omni_math.yaml:
--------------------------------------------------------------------------------
 1 | handler: omni_math
 2 | dataset_path: "KbsdJames/Omni-MATH"  # repo ID in huggingface
 3 | dataset_subset: null # which subset on huggingface
 4 | dataset_split: test_rule_based # Rule based evaluation
 5 | dataset_kwargs:
 6 |   # NOTE: This is using the subset for rule-based evaluation in the below PR
 7 |   revision: refs/pr/2 
 8 | question_key: problem
 9 | answer_key: answer
10 | templating_parameters: 
11 |   template: "Return your final response within \\boxed{{}}. {problem}"


--------------------------------------------------------------------------------
/skythought/evals/tasks/taco/taco.yaml:
--------------------------------------------------------------------------------
 1 | handler: taco
 2 | dataset_path: "BAAI/TACO"
 3 | dataset_subset: MEDIUM
 4 | dataset_split: train
 5 | dataset_kwargs:
 6 |   trust_remote_code: true
 7 | question_key: question
 8 | answer_key: null
 9 | templating_parameters:
10 |   initial_template: "\nQUESTION:\n{prompt}"
11 |   # Add starter code to initial template
12 |   starter_code_template: "{input}\n{starter_code}"
13 |   # stdin template is used when there is no starter code or fn_name
14 |   stdin_template: "{input}\nUse Standard Input format\nANSWER:\n"
15 |   # call template is used when there is starter code or fn_name
16 |   call_template: "{input}\nUse Call-Based format\nANSWER:\n"
17 | # Optionally, you can filter the dataset by difficulty
18 | # preprocess_config:
19 | #   difficulty: easy
20 | 
21 | 


--------------------------------------------------------------------------------
/skythought/evals/tasks/task_util.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | from typing import Dict
 4 | 
 5 | 
 6 | def get_tasks(task_root_dir: str) -> Dict[str, str]:
 7 |     """Returns a dictionary of task names and their corresponding yaml file paths"""
 8 |     # list all yamls in subdirectories
 9 |     name_to_yaml = {}
10 |     for yaml_file in glob.glob(
11 |         os.path.join(task_root_dir, "**", "*.yaml"), recursive=True
12 |     ):
13 |         # arc.yaml -> arc
14 |         name = os.path.basename(yaml_file).split(".")[0]
15 | 
16 |         name_to_yaml[name] = yaml_file
17 | 
18 |     return name_to_yaml
19 | 


--------------------------------------------------------------------------------
/skythought/evals/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/evals/util/__init__.py


--------------------------------------------------------------------------------
/skythought/evals/util/results.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from dataclasses import asdict, dataclass
 3 | from pathlib import Path
 4 | from typing import Any, Dict, Optional
 5 | 
 6 | 
 7 | @dataclass
 8 | class SummaryResults:
 9 |     configuration: Dict[str, Any]
10 |     total_completion_tokens: int = 0
11 |     avg_completion_tokens: float = 0
12 |     total_prompt_tokens: int = 0
13 |     avg_prompt_tokens: float = 0
14 |     accuracy: float = 0.0
15 |     pass_at_k: Optional[Dict[str, float]] = None
16 | 
17 |     def to_json_dict(self) -> Dict[str, Any]:
18 |         """Convert to a JSON-compatible dictionary."""
19 |         return asdict(self)
20 | 
21 | 
22 | def save_summary(summary_path: Path, summary: SummaryResults) -> None:
23 |     with open(summary_path, "w", encoding="utf-8") as f:
24 |         json.dump(summary.to_json_dict(), f, indent=4)
25 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.8"
10 | 
11 | sphinx:
12 |   configuration: docs/conf.py
13 | 
14 | python:
15 |   install:
16 |     - requirements: docs/requirements-docs.txt


--------------------------------------------------------------------------------
/skythought/skythought-rl/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = google
3 | column_limit = 120
4 | indent_width = 4
5 | split_arguments_when_comma_terminated: true


--------------------------------------------------------------------------------
/skythought/skythought-rl/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/skythought/skythought-rl/README.md:
--------------------------------------------------------------------------------
 1 | ### Install veRL:
 2 | 1. Create a conda environment:
 3 | 
 4 | ```bash
 5 | conda create -n verl python==3.9
 6 | conda activate verl
 7 | pip install -r requirements.txt
 8 | ```
 9 | 
10 | 2. Install common dependencies (required for all backends)
11 | 
12 | ```bash
13 | pip3 install vllm==0.6.3 # or you can install 0.5.4, 0.4.2 and 0.3.1
14 | pip3 install ray
15 | 
16 | # flash attention 2
17 | pip3 install flash-attn --no-build-isolation
18 | ```
19 | 
20 | 3. Install veRL
21 | 
22 | ```bash
23 | pip3 install -e .
24 | ```
25 | 
26 | ### Prepare the data
27 | `python data/data_prepare_*.py --output {corresponding path}`
28 | 
29 | ### Launch the training
30 | ```bash
31 | cd examples/sky-t1
32 | bash ./run-sky-t1-7b-zero.sh
33 | ```
34 | 
35 | 
36 | ### Acknowledgement
37 | This repo is modified on top of [VeRL](https://github.com/volcengine/verl) and [PRIME](https://github.com/PRIME-RL/PRIME).
38 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/docs/README.md:
--------------------------------------------------------------------------------
 1 | # veRL documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and open localhost:8000.


--------------------------------------------------------------------------------
/skythought/skythought-rl/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/skythought-rl/docs/_static/logo.png


--------------------------------------------------------------------------------
/skythought/skythought-rl/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/skythought/skythought-rl/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # markdown suport
2 | recommonmark
3 | # markdown table suport
4 | sphinx-markdown-tables
5 | 
6 | # theme default rtd
7 | 
8 | # crate-docs-theme
9 | sphinx-rtd-theme


--------------------------------------------------------------------------------
/skythought/skythought-rl/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_generation \
 2 |     trainer.nnodes=1 \
 3 |     trainer.n_gpus_per_node=8 \
 4 |     data.path=~/data/rlhf/gsm8k/test.parquet \
 5 |     data.prompt_key=prompt \
 6 |     data.n_samples=1 \
 7 |     data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \
 8 |     model.path=deepseek-ai/deepseek-llm-7b-chat \
 9 |     +model.trust_remote_code=True \
10 |     rollout.temperature=1.0 \
11 |     rollout.top_k=50 \
12 |     rollout.top_p=0.7 \
13 |     rollout.prompt_length=2048 \
14 |     rollout.response_length=1024 \
15 |     rollout.tensor_model_parallel_size=2 \
16 |     rollout.gpu_memory_utilization=0.8
17 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path
 4 | 
 5 | nproc_per_node=$1
 6 | 
 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
 8 |      -m verl.trainer.fsdp_sft_trainer \
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.response_key=answer \
13 |     data.micro_batch_size=8 \
14 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
15 |     trainer.default_hdfs_dir=$hdfs_path \
16 |     trainer.project_name=gsm8k-sft \
17 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
18 |     trainer.total_epochs=4 \
19 |     trainer.logger=['console','wandb']


--------------------------------------------------------------------------------
/skythought/skythought-rl/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path
 4 | 
 5 | nproc_per_node=$1
 6 | 
 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
 8 |      -m verl.trainer.fsdp_sft_trainer \
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.response_key=answer \
13 |     data.micro_batch_size=8 \
14 |     model.partial_pretrain=google/gemma-1.1-7b-it \
15 |     trainer.default_hdfs_dir=$hdfs_path \
16 |     trainer.project_name=gsm8k-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
18 |     trainer.total_epochs=4 \
19 |     trainer.logger=['console','wandb']


--------------------------------------------------------------------------------
/skythought/skythought-rl/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | codetiming
 3 | datasets
 4 | dill
 5 | hydra-core
 6 | numpy
 7 | pybind11
 8 | ray
 9 | tensordict<0.6
10 | transformers
11 | vllm<=0.6.3
12 | wandb
13 | pyext
14 | word2number
15 | pylatexenc


--------------------------------------------------------------------------------
/skythought/skythought-rl/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool
17 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls
16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo)


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
3 |   prompt_key: prompt
4 |   response_key: responses
5 |   data_source_key: data_source
6 |   reward_model_key: reward_model


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import *
17 | 
18 | __all__ = tokenizer.__all__


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import log_gpu_memory_usage


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/utils/reward_score/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.1


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/skythought/skythought-rl/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .vllm_rollout import vLLMRollout


--------------------------------------------------------------------------------
/skythought/test-time-scaling/assets/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/test-time-scaling/assets/figure1.png


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --method naive_nodspy \
10 |         --lcb_version release_v2 \
11 |         --result_json_path="results/baselines_4o_mini_${difficulty}.json" \
12 | 
13 | done
14 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/4o_mini_cct.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source /root/miniconda3/etc/profile.d/conda.sh
 4 | conda activate sstar
 5 | 
 6 | python codecontest_evaluate_multiprocess.py \
 7 |     --temperature=0.7 \
 8 |     --num_threads=16 \
 9 |     --method naive_nodspy \
10 |     --generator 4o \
11 |     --result_json_path="results/baselines_4o_codecontest.json"
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/o1_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator o1-mini \
10 |         --method naive_nodspy \
11 |         --lcb_version release_v2 \
12 |         --result_json_path="results/baselines_o1_mini_${difficulty}.json" \
13 |         
14 | done
15 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/o1_preview.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator o1-preview \
10 |         --method naive_nodspy \
11 |         --lcb_version release_v2 \
12 |         --result_json_path="results/baselines_o1_preview_${difficulty}.json" \
13 |         
14 | done
15 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/o3_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator o3-mini \
10 |         --method naive_nodspy \
11 |         --lcb_version release_v2 \
12 |         --result_json_path="results/baselines_o3_mini_${difficulty}.json" \
13 |         
14 | done
15 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwen0.5b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen0.5b \
10 |         --api_name Qwen/Qwen2.5-Coder-0.5B-Instruct \
11 |         --api_base http://localhost:8000/v1 \
12 |         --method naive_nodspy \
13 |         --lcb_version release_v2 \
14 |         --result_json_path="results/baselines_qwen0.5b_${difficulty}.json" \
15 | 
16 | done
17 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwen1.5b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen1.5b \
10 |         --api_name Qwen/Qwen2.5-Coder-1.5B-Instruct \
11 |         --api_base http://localhost:8000/v1 \
12 |         --method naive_nodspy \
13 |         --lcb_version release_v2 \
14 |         --result_json_path="results/baselines_qwen1.5b_${difficulty}.json" \
15 | 
16 | done
17 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwen14b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen14b \
10 |         --api_name Qwen/Qwen2.5-Coder-14B-Instruct \
11 |         --api_base http://localhost:8000/v1 \
12 |         --method naive_nodspy \
13 |         --lcb_version release_v2 \
14 |         --result_json_path="results/baselines_qwen14b_${difficulty}.json" \
15 | 
16 | done
17 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwen32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen32b \
10 |         --api_name Qwen/Qwen2.5-Coder-32B-Instruct \
11 |         --api_base http://localhost:8000/v1 \
12 |         --method naive_nodspy \
13 |         --lcb_version release_v2 \
14 |         --result_json_path="results/baselines_qwen32b_${difficulty}.json" \
15 | 
16 | done
17 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwen3b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen3b \
10 |         --api_name Qwen/Qwen2.5-Coder-3B-Instruct \
11 |         --api_base http://localhost:8000/v1 \
12 |         --method naive_nodspy \
13 |         --lcb_version release_v2 \
14 |         --result_json_path="results/baselines_qwen3b_${difficulty}.json" \
15 | 
16 | done
17 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwen7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen7b \
10 |         --api_name Qwen/Qwen2.5-Coder-7B-Instruct \
11 |         --api_base http://localhost:8000/v1 \
12 |         --method naive_nodspy \
13 |         --lcb_version release_v2 \
14 |         --result_json_path="results/baselines_qwen7b_${difficulty}.json" \
15 | 
16 | done
17 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/qwq32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator qwen32b \
10 | 	--api_name Qwen/QwQ-32B-Preview \
11 |         --api_base http://localhost:8000/v1 \
12 | 	--no_dspy_gen \
13 |         --method naive_nodspy \
14 |         --lcb_version release_v2 \
15 |         --result_json_path="results/baselines_qwq32b_${difficulty}.json" \
16 | 
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/r1qwen14b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator r1qwen32b \
10 | 	--api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
11 |         --api_base http://localhost:8000/v1 \
12 | 	--no_dspy_gen \
13 |         --method naive_nodspy \
14 |         --lcb_version release_v2 \
15 |         --result_json_path="results/baselines_r1qwen14b_${difficulty}.json" \
16 | 
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/r1qwen32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator r1qwen32b \
10 | 	--api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
11 |         --api_base http://localhost:8000/v1 \
12 | 	--no_dspy_gen \
13 |         --method naive_nodspy \
14 |         --lcb_version release_v2 \
15 |         --result_json_path="results/baselines_r1qwen32b_${difficulty}.json" \
16 | 
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines/r1qwen7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --generator r1qwen32b \
10 | 	--api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
11 |         --api_base http://localhost:8000/v1 \
12 | 	--no_dspy_gen \
13 |         --method naive_nodspy \
14 |         --lcb_version release_v2 \
15 |         --result_json_path="results/baselines_r1qwen7b_${difficulty}.json" \
16 | 
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/4o_mini_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=1.0 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --lcb_version release_v2 \
13 |         --ablation_qwq_vanilla_without_reasoning \
14 |         --ablation_qwq_debug_with_4o_mini \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_4o_mini_n_1_debug_public3_select_random_${difficulty}.json" 
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/o1_mini_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --no_dspy_gen \
12 | 	--generator=o1-mini \
13 |         --selection=random \
14 |         --lcb_version release_v2 \
15 |         --ablation_qwq_vanilla_without_reasoning \
16 |         --ablation_qwq_debug_with_4o_mini \
17 |         --num_round ${MAX_ROUND} \
18 |         --result_json_path="results/final_o1_mini_n_1_debug_public3_select_random_${difficulty}.json" \
19 | 
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwen0.5b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-0.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --no_dspy_gen \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen0.5b_n_1_debug_public3_select_random_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwen1.5b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-1.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --no_dspy_gen \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen1.5b_n_1_debug_public3_select_random_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwen14b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-14B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen14b_n_1_debug_public3_select_random_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwen32b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen32b_n_1_debug_public3_select_random_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwen3b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-3B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --no_dspy_gen \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen3b_n_1_debug_public3_select_random_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwen7b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen7b_n_1_debug_public3_select_random_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/qwq32b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name Qwen/QwQ-32B-Preview \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_qwq32b_n_1_debug_public3_select_random_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/r1qwen14b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 | 	    --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_r1qwen14b_n_1_debug_public3_select_random_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/r1qwen32b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 | 	    --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_r1qwen32b_n_1_debug_public3_select_random_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/baselines_selfdebug/r1qwen7b_n_1_debug_public3_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=1 \
11 |         --selection=random \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 | 	    --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_r1qwen7b_n_1_debug_public3_select_random_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/4omini_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --lcb_version release_v2 \
13 |         --num_round ${MAX_ROUND} \
14 |         --result_json_path="results/final_4omini_n_16_debug_public3_select_first_cached_${difficulty}.json" \
15 |         --load_cached_preds \
16 |         --cached_preds_path="results/final_4omini_n_16_debug_public3_select_oracle_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/batch_small_models_first.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting Qwen-0.5B evaluation..."
 4 | bash scripts/final_first_cached/qwen0.5b_n_16_debug_public3_select_first_cached.sh
 5 | 
 6 | echo "Starting Qwen-1.5B evaluation..."
 7 | bash scripts/final_first_cached/qwen1.5b_n_16_debug_public3_select_first_cached.sh
 8 | 
 9 | echo "Starting Qwen-3B evaluation..."
10 | bash scripts/final_first_cached/qwen3b_n_16_debug_public3_select_first_cached.sh
11 | 
12 | echo "All evaluations completed!"
13 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/o1mini_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --lcb_version release_v2 \
13 |         --num_round ${MAX_ROUND} \
14 |         --result_json_path="results/final_o1mini_n_16_debug_public3_select_first_cached_${difficulty}.json" \
15 |         --load_cached_preds \
16 |         --cached_preds_path="results/final_o1mini_n_16_debug_public3_select_oracle_${difficulty}.json"
17 | done
18 | 
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/qwen0.5b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-0.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen0.5b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_qwen0.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/qwen1.5b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-1.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen1.5b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_qwen1.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/qwen14b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-14B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen14b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/qwen32b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen32b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/qwen3b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-3B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen3b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_qwen3b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/qwen7b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen7b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/r1qwen14b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_r1qwen14b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_r1qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/r1qwen32b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_r1qwen32b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_r1qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_first_cached/r1qwen7b_n_16_debug_public3_select_first_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=first \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_r1qwen7b_n_16_debug_public3_select_first_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_r1qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/4omini_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_notimeout \
12 |         --test_generator 4o-mini \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --result_json_path="results/final_4omini_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
16 |         --load_cached_preds \
17 |         --cached_preds_path="results/final_4omini_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/batch_small_models_gentest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting Qwen-0.5B evaluation..."
 4 | bash scripts/final_gentest_notimeout_cached/qwen0.5b_n_16_debug_public3_select_4omini_cached.sh
 5 | 
 6 | echo "Starting Qwen-1.5B evaluation..."
 7 | bash scripts/final_gentest_notimeout_cached/qwen1.5b_n_16_debug_public3_select_4omini_cached.sh
 8 | 
 9 | echo "Starting Qwen-3B evaluation..."
10 | bash scripts/final_gentest_notimeout_cached/qwen3b_n_16_debug_public3_select_4omini_cached.sh
11 | 
12 | echo "All evaluations completed!"


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/o1mini_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --test_generator 4o-mini \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --result_json_path="results/final_o1mini_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
16 |         --load_cached_preds \
17 |         --cached_preds_path="results/final_o1mini_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/qwen0.5b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-0.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen0.5b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen0.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/qwen1.5b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-1.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen1.5b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen1.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/qwen14b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-14B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen14b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/qwen32b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen32b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/qwen3b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-3B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen3b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen3b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/qwen7b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen7b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/r1qwen14b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen14b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/r1qwen32b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen32b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen32b_n_16_debug_public3_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_gentest_notimeout_cached/r1qwen7b_n_16_debug_public3_select_4omini_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_no_timeout \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen7b_n_16_debug_public3_select_4omini_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/4omini_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection oracle_all_rounds \
12 |         --lcb_version release_v2 \
13 |         --num_round ${MAX_ROUND} \
14 |         --result_json_path="results/final_4omini_n_16_debug_public3_select_oracle_${difficulty}.json"
15 | done
16 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/o1mini_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=1.0 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --generator o1-mini \
12 |         --selection oracle_all_rounds \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --ablation_qwq_vanilla_without_reasoning \
16 |         --ablation_qwq_debug_with_4o_mini \
17 |         --no_dspy_gen \
18 |         --result_json_path="results/final_o1mini_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen0.5b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --no_dspy_gen \
12 |         --api_name Qwen/Qwen2.5-Coder-0.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --selection oracle_all_rounds \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen0.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen1.5b_n_32_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --no_dspy_gen \
12 |         --api_name Qwen/Qwen2.5-Coder-1.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --selection oracle_all_rounds \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen1.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 |     


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen14b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --api_name openai/Qwen/Qwen2.5-Coder-14B-Instruct \
12 |         --api_base http://localhost:8000/v1 \
13 |         --selection oracle_all_rounds \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen32b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=16 \
11 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
12 |         --api_base http://localhost:8000/v1 \
13 |         --selection oracle_all_rounds \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen32b_n_16_debug_public3_select_oracle_icl_patterns.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in medium
 5 | do
 6 |     num_icl=1
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=16 \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --selection oracle_all_rounds \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --num_icl_examples ${num_icl} \
18 |         --icl_retriever pattern \
19 |         --result_json_path="results_final/final_qwen32b_n_16_debug_public3_select_oracle_icl_${num_icl}_patterns_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen3b_n_32_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --no_dspy_gen \
12 |         --api_name Qwen/Qwen2.5-Coder-3B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --selection oracle_all_rounds \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen3b_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwen7b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
12 |         --api_base http://localhost:8000/v1 \
13 |         --selection oracle_all_rounds \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
17 | done
18 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/qwq32b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=16 \
11 |         --api_name Qwen/QwQ-32B-Preview \
12 |         --api_base http://localhost:8000/v1 \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 | 	--selection oracle_all_rounds \
16 |         --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_qwq32b_n_16_debug_public3_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/r1qwen14b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
12 |         --api_base http://localhost:8000/v1 \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --selection oracle_all_rounds \
16 |         --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_r1qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/r1qwen32b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=16 \
11 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
12 |         --api_base http://localhost:8000/v1 \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --selection oracle_all_rounds \
16 |         --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_r1qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_oracle/r1qwen7b_n_16_debug_public3_select_oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
12 |         --api_base http://localhost:8000/v1 \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --selection oracle_all_rounds \
16 |         --no_dspy_gen \
17 |         --ablation_qwq_vanilla_without_reasoning \
18 |         --ablation_qwq_debug_with_4o_mini \
19 |         --result_json_path="results/final_r1qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/4omini_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --result_json_path="results/final_4omini_n_16_debug_public3_select_random_cached_${difficulty}.json" \
16 |         --load_cached_preds \
17 |         --cached_preds_path="results/final_4omini_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/batch_small_models_random.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting Qwen-0.5B evaluation..."
 4 | bash scripts/final_random_cached/qwen0.5b_n_16_debug_public3_select_random_cached.sh
 5 | 
 6 | echo "Starting Qwen-1.5B evaluation..."
 7 | bash scripts/final_random_cached/qwen1.5b_n_16_debug_public3_select_random_cached.sh
 8 | 
 9 | echo "Starting Qwen-3B evaluation..."
10 | bash scripts/final_random_cached/qwen3b_n_16_debug_public3_select_random_cached.sh
11 | 
12 | echo "All evaluations completed!"


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/o1mini_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --test_generator 4o-mini \
14 |         --lcb_version release_v2 \
15 |         --num_round ${MAX_ROUND} \
16 |         --result_json_path="results/final_o1mini_n_16_debug_public3_select_random_cached_${difficulty}.json" \
17 |         --load_cached_preds \
18 |         --cached_preds_path="results/final_o1mini_n_16_debug_public3_select_oracle_${difficulty}.json"
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/qwen0.5b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name openai/Qwen/Qwen2.5-Coder-0.5B-Instruct \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen0.5b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen0.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/qwen1.5b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name openai/Qwen/Qwen2.5-Coder-1.5B-Instruct \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen1.5b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen1.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/qwen14b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name openai/Qwen/Qwen2.5-Coder-14B-Instruct \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen14b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/qwen32b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen32b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/qwen3b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name openai/Qwen/Qwen2.5-Coder-3B-Instruct \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen3b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen3b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/qwen7b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen7b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/r1qwen14b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen14b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/r1qwen32b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen32b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_random_cached/r1qwen7b_n_16_debug_public3_select_random_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=random \
12 |         --seed=40 \
13 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
14 |         --api_base http://localhost:8000/v1 \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen7b_n_16_debug_public3_select_random_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/4omini_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --test_generator 4o-mini \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --result_json_path="results/final_4omini_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
16 |         --load_cached_preds \
17 |         --cached_preds_path="results/final_4omini_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/batch_small_models_tool_assisted.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting Qwen-0.5B evaluation..."
 4 | bash scripts/final_tool_assisted_cached/qwen0.5b_n_16_debug_public3_select_tool_assisted_cached.sh
 5 | 
 6 | echo "Starting Qwen-1.5B evaluation..."
 7 | bash scripts/final_tool_assisted_cached/qwen1.5b_n_16_debug_public3_select_tool_assisted_cached.sh
 8 | 
 9 | echo "Starting Qwen-3B evaluation..."
10 | bash scripts/final_tool_assisted_cached/qwen3b_n_16_debug_public3_select_tool_assisted_cached.sh
11 | 
12 | echo "All evaluations completed!"


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/o1mini_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --test_generator 4o-mini \
13 |         --lcb_version release_v2 \
14 |         --num_round ${MAX_ROUND} \
15 |         --result_json_path="results/final_o1mini_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
16 |         --load_cached_preds \
17 |         --cached_preds_path="results/final_o1mini_n_16_debug_public3_select_oracle_${difficulty}.json"
18 | done
19 | 
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/qwen0.5b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-0.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen0.5b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen0.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/qwen1.5b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-1.5B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen1.5b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen1.5b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/qwen14b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-14B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen14b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/qwen32b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen32b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen32b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/qwen3b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-3B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen3b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen3b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/qwen7b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_qwen7b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/r1qwen14b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen14b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen14b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/final_tool_assisted_cached/r1qwen7b_n_16_debug_public3_select_tool_assisted_cached.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=3
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=32 \
10 |         --n=16 \
11 |         --selection=generated_tests_tool_assisted \
12 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
13 |         --api_base http://localhost:8000/v1 \
14 |         --test_generator 4o-mini \
15 |         --lcb_version release_v2 \
16 |         --num_round ${MAX_ROUND} \
17 |         --result_json_path="results/final_r1qwen7b_n_16_debug_public3_select_tool_assisted_cached_${difficulty}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/final_r1qwen7b_n_16_debug_public3_select_oracle_${difficulty}.json"
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/4o_mini_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --selection generated_tests_majority_no_public_tests \
15 |         --result_json_path="results/majority_4o_mini_n_16_${difficulty}.json" \
16 | 
17 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/o1_mini_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --generator o1-mini \
15 |         --selection generated_tests_majority_no_public_tests \
16 |         --result_json_path="results/majority_o1_mini_n_16_${difficulty}.json" \
17 | 
18 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwen0.5b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/Qwen2.5-Coder-0.5B-Instruct \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwen0.5b_n_16_${difficulty}.json" \
18 | 
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwen1.5b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/Qwen2.5-Coder-1.5B-Instruct \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwen1.5b_n_16_${difficulty}.json" \
18 | 
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwen14b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/Qwen2.5-Coder-14B-Instruct \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwen14b_n_16_${difficulty}.json" \
18 | 
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwen32b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=128 \
 9 |         --n=16 \
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/Qwen2.5-Coder-32B-Instruct \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwen32b_n_16_${difficulty}.json" \
18 | 
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwen3b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/Qwen2.5-Coder-3B-Instruct \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwen3b_n_16_${difficulty}.json" \
18 | 
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwen7b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=16 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/Qwen2.5-Coder-7B-Instruct \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwen7b_n_16_${difficulty}.json" \
18 | 
19 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/qwq32b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy
 4 | do
 5 |     python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=32 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name Qwen/QwQ-32B-Preview \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_qwq32b_n_16_${difficulty}.json" \
18 | 
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/r1qwen14b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=32 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_r1qwen14b_n_16_${difficulty}.json" \
18 | 
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/r1qwen32b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=32 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_r1qwen32b_n_16_${difficulty}.json" \
18 | 
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/majority_baselines/r1qwen7b_n_16_majority.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     /root/miniconda3/envs/sstar/bin/python evaluate_multiprocess.py \
 6 |         --difficulty=${difficulty} \
 7 |         --temperature=0.7 \
 8 |         --num_threads=32 \
 9 |         --n=16 \
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v2 \
12 |         --num_round 1 \
13 |         --no_dspy_gen \
14 |         --api_name deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
15 |         --api_base http://localhost:8000/v1 \
16 |         --selection generated_tests_majority_no_public_tests \
17 |         --result_json_path="results/majority_r1qwen7b_n_16_${difficulty}.json" \
18 | 
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp02_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     for n in 1 2 4 8 16 32 64 128
 6 |     do
 7 |         python evaluate_multiprocess.py \
 8 |             --difficulty=${difficulty} \
 9 |             --temperature=0.2 \
10 |             --num_threads=32 \
11 |             --n=${n} \
12 |             --selection=oracle \
13 |             --lcb_version release_v4 \
14 |             --start_date 2024-08-01 \
15 |             --end_date 2024-12-01 \
16 |             --no_refine \
17 |             --num_round 1 \
18 |             --result_json_path="results/sec4_parallel_sample_temp02_4o_mini_${difficulty}_n_${n}.json"
19 |     done
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp02_qwen7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.2 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_temp02_qwen_7b_${difficulty}_n_${n}.json"
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp02_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.2 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_temp02_qwen_32b_${difficulty}_n_${n}.json"
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp05_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     for n in 1 2 4 8 16 32 64 128
 6 |     do
 7 |         python evaluate_multiprocess.py \
 8 |             --difficulty=${difficulty} \
 9 |             --temperature=0.5 \
10 |             --num_threads=32 \
11 |             --n=${n} \
12 |             --selection=oracle \
13 |             --lcb_version release_v4 \
14 |             --start_date 2024-08-01 \
15 |             --end_date 2024-12-01 \
16 |             --no_refine \
17 |             --num_round 1 \
18 |             --result_json_path="results/sec4_parallel_sample_temp05_4o_mini_${difficulty}_n_${n}.json"
19 |     done
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp05_qwen7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.5 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_temp05_qwen_7b_${difficulty}_n_${n}.json"
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp05_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.5 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_temp05_qwen_32b_${difficulty}_n_${n}.json"
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp09_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     for n in 1 2 4 8 16 32 64 128
 6 |     do
 7 |         python evaluate_multiprocess.py \
 8 |             --difficulty=${difficulty} \
 9 |             --temperature=0.95 \
10 |             --num_threads=32 \
11 |             --n=${n} \
12 |             --selection=oracle \
13 |             --lcb_version release_v4 \
14 |             --start_date 2024-08-01 \
15 |             --end_date 2024-12-01 \
16 |             --no_refine \
17 |             --num_round 1 \
18 |             --result_json_path="results/sec4_parallel_sample_temp09_4o_mini_${difficulty}_n_${n}.json"
19 |     done
20 | done
21 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp09_qwen7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.95 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_temp09_qwen_7b_${difficulty}_n_${n}.json"
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/temp09_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.95 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_temp09_qwen_32b_${difficulty}_n_${n}.json"
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/vanilla_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for difficulty in easy medium hard
 4 | do
 5 |     for n in 1 2 4 8 16 32 64 128
 6 |     do
 7 |         python evaluate_multiprocess.py \
 8 |             --difficulty=${difficulty} \
 9 |             --temperature=0.7 \
10 |             --num_threads=32 \
11 |             --n=${n} \
12 |             --selection=oracle \
13 |             --lcb_version release_v4 \
14 |             --start_date 2024-08-01 \
15 |             --end_date 2024-12-01 \
16 |             --no_refine \
17 |             --num_round 1 \
18 |             --result_json_path="results/sec4_parallel_sample_vanilla_4o_mini_${difficulty}_n_${n}.json"
19 |     done
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/vanilla_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.7 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_vanilla_qwen_32b_${difficulty}_n_${n}.json"
23 |     done
24 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/vanilla_qwen_7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.7 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
21 |             --api_base http://localhost:8000/v1 \
22 |             --result_json_path="results/sec4_parallel_sample_vanilla_qwen_7b_${difficulty}_n_${n}.json"
23 |     done
24 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/vanilla_qwq_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/QwQ-32B-Preview --tensor-parallel-size 8
 4 | 
 5 | for difficulty in easy medium hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.7 \
12 |             --num_threads=32 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name Qwen/QwQ-32B-Preview \
21 |             --api_base http://localhost:8000/v1 \
22 |             --no_dspy_gen \
23 |             --result_json_path="results/sec4_parallel_sample_vanilla_qwq_32b_${difficulty}_n_${n}.json"
24 |     done
25 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/vanilla_qwq_32b_hard.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/QwQ-32B-Preview --tensor-parallel-size 8
 4 | 
 5 | for difficulty in hard
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.7 \
12 |             --num_threads=8 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name Qwen/QwQ-32B-Preview \
21 |             --api_base http://localhost:8000/v1 \
22 |             --no_dspy_gen \
23 |             --result_json_path="results/sec4_parallel_sample_vanilla_qwq_32b_${difficulty}_n_${n}.json"
24 |     done
25 | done
26 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec4_parallel_sample/vanilla_qwq_32b_medium.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/QwQ-32B-Preview --tensor-parallel-size 8
 4 | 
 5 | for difficulty in medium
 6 | do
 7 |     for n in 1 2 4 8 16 32 64 128
 8 |     do
 9 |         python evaluate_multiprocess.py \
10 |             --difficulty=${difficulty} \
11 |             --temperature=0.7 \
12 |             --num_threads=8 \
13 |             --n=${n} \
14 |             --selection=oracle \
15 |             --lcb_version release_v4 \
16 |             --start_date 2024-08-01 \
17 |             --end_date 2024-12-01 \
18 |             --no_refine \
19 |             --num_round 1 \
20 |             --api_name Qwen/QwQ-32B-Preview \
21 |             --api_base http://localhost:8000/v1 \
22 |             --no_dspy_gen \
23 |             --result_json_path="results/sec4_parallel_sample_vanilla_qwq_32b_${difficulty}_n_${n}.json"
24 |     done
25 | done
26 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/last_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=5
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=8 \
11 |         --selection=oracle \
12 |         --lcb_version release_v4 \
13 |         --start_date 2024-08-01 \
14 |         --end_date 2024-12-01 \
15 | 	--context last \
16 |         --num_round ${MAX_ROUND} \
17 |         --selection oracle_all_rounds \
18 |         --result_json_path="results/sec5_revision_last_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/last_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | MAX_ROUND=5
 5 | for difficulty in easy medium hard
 6 | do
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=8 \
12 |         --selection=oracle \
13 |         --lcb_version release_v4 \
14 |         --start_date 2024-08-01 \
15 |         --end_date 2024-12-01 \
16 |         --num_round ${MAX_ROUND} \
17 | 	--context last \
18 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
19 |         --api_base http://localhost:8000/v1 \
20 |         --selection oracle_all_rounds \
21 |         --result_json_path="results/sec5_revision_last_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
22 | done
23 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/last_qwen_7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | MAX_ROUND=5
 5 | for difficulty in easy medium hard
 6 | do
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=8 \
12 |         --selection=oracle \
13 |         --lcb_version release_v4 \
14 |         --start_date 2024-08-01 \
15 |         --end_date 2024-12-01 \
16 |         --num_round ${MAX_ROUND} \
17 | 	--context last \
18 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
19 |         --api_base http://localhost:8000/v1 \
20 |         --selection oracle_all_rounds \
21 |         --result_json_path="results/sec5_revision_last_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
22 | done
23 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/refine_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=5
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=8 \
11 |         --selection=oracle \
12 |         --lcb_version release_v4 \
13 |         --start_date 2024-08-01 \
14 |         --end_date 2024-12-01 \
15 |         --num_round ${MAX_ROUND} \
16 | 	--selfdebug_decision refine \
17 |         --selection oracle_all_rounds \
18 |         --result_json_path="results/sec5_revision_refine_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
19 | done
20 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/refine_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | MAX_ROUND=5
 5 | for difficulty in easy medium hard
 6 | do
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=8 \
12 |         --selection=oracle \
13 |         --lcb_version release_v4 \
14 |         --start_date 2024-08-01 \
15 |         --end_date 2024-12-01 \
16 |         --num_round ${MAX_ROUND} \
17 | 	--selfdebug_decision refine \
18 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
19 |         --api_base http://localhost:8000/v1 \
20 |         --selection oracle_all_rounds \
21 |         --result_json_path="results/sec5_revision_refine_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
22 | done
23 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/refine_qwen_7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | MAX_ROUND=5
 5 | for difficulty in easy medium hard
 6 | do
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=8 \
12 |         --selection=oracle \
13 |         --lcb_version release_v4 \
14 |         --start_date 2024-08-01 \
15 |         --end_date 2024-12-01 \
16 |         --num_round ${MAX_ROUND} \
17 | 	--selfdebug_decision refine \
18 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
19 |         --api_base http://localhost:8000/v1 \
20 |         --selection oracle_all_rounds \
21 |         --result_json_path="results/sec5_revision_refine_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
22 | done
23 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/vanilla_4o_mini.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MAX_ROUND=5
 4 | for difficulty in easy medium hard
 5 | do
 6 |     python evaluate_multiprocess.py \
 7 |         --difficulty=${difficulty} \
 8 |         --temperature=0.7 \
 9 |         --num_threads=16 \
10 |         --n=8 \
11 |         --selection=oracle \
12 |         --lcb_version release_v4 \
13 |         --start_date 2024-08-01 \
14 |         --end_date 2024-12-01 \
15 |         --num_round ${MAX_ROUND} \
16 | 	--selection oracle_all_rounds \
17 |         --result_json_path="results/sec5_revision_vanilla_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
18 | done
19 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/vanilla_qwen_32b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-32B-Instruct --tensor-parallel-size 8
 4 | MAX_ROUND=5
 5 | for difficulty in easy medium hard
 6 | do
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=8 \
12 |         --selection=oracle \
13 |         --lcb_version release_v4 \
14 |         --start_date 2024-08-01 \
15 |         --end_date 2024-12-01 \
16 |         --num_round ${MAX_ROUND} \
17 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
18 |         --api_base http://localhost:8000/v1 \
19 |         --selection oracle_all_rounds \
20 |         --result_json_path="results/sec5_revision_vanilla_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
21 | done
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec5_revision/vanilla_qwen_7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Server: vllm serve Qwen/Qwen2.5-Coder-7B-Instruct
 4 | MAX_ROUND=5
 5 | for difficulty in easy medium hard
 6 | do
 7 |     python evaluate_multiprocess.py \
 8 |         --difficulty=${difficulty} \
 9 |         --temperature=0.7 \
10 |         --num_threads=16 \
11 |         --n=8 \
12 |         --selection=oracle \
13 |         --lcb_version release_v4 \
14 |         --start_date 2024-08-01 \
15 |         --end_date 2024-12-01 \
16 |         --num_round ${MAX_ROUND} \
17 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
18 |         --api_base http://localhost:8000/v1 \
19 |         --selection oracle_all_rounds \
20 |         --result_json_path="results/sec5_revision_vanilla_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
21 | done
22 | 


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/4o_mini_tool_assisted.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests_tool_assisted\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --load_cached_preds \
16 |         --result_json_path="results/sec6_4o_mini_tool_assisted_${difficulty}_max_round_${MAX_ROUND}.json" \
17 |         --cached_preds_path="results/sec5_revision_vanilla_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
18 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/4o_mini_vanilla_baseline.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=first\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --load_cached_preds \
16 |         --result_json_path="results/sec6_4o_mini_vanilla_baseline_${difficulty}_max_round_${MAX_ROUND}.json" \
17 |         --cached_preds_path="results/sec5_revision_vanilla_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
18 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/4o_mini_vanilla_with_4omini_generated_and_timeout_test.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --load_cached_preds \
16 |         --result_json_path="results/sec6_4o_mini_with_4omini_generated_and_timeout_test_${difficulty}_max_round_${MAX_ROUND}.json" \
17 |         --cached_preds_path="results/sec5_revision_vanilla_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
18 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/qwen_32b_tool_assisted.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests_tool_assisted\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen32b_tool_assisted_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/qwen_32b_vanilla_baseline.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=first\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen32b_vanilla_baseline_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/qwen_32b_with4omini_test_and_timeout_vanilla.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen32b_with_4omini_and_timeout_vanilla_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/qwen_7b_tool_assisted.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests_tool_assisted\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen7b_tool_assisted_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/qwen_7b_vanilla_baseline.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=first\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen7b_vanilla_baseline_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6/qwen_7b_with4omini_test_and_timeout_vanilla.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen7b_with_4omini_and_timeout_vanilla_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6_llm_judge_baseline/4o_mini_llm_judge_baseline.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests_aware_llm_judge\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --load_cached_preds \
16 |         --result_json_path="results/sec6_4o_mini_llm_judge_baseline_${difficulty}_max_round_${MAX_ROUND}.json" \
17 |         --cached_preds_path="results/sec5_revision_vanilla_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
18 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6_llm_judge_baseline/qwen_32b_llm_judge_baseline.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests_aware_llm_judge\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-32B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen32b_llm_judge_baseline_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_32b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6_llm_judge_baseline/qwen_7b_llm_judge_baseline.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests_aware_llm_judge\
10 |         --test_generator 4o-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --api_name openai/Qwen/Qwen2.5-Coder-7B-Instruct \
16 |         --api_base http://localhost:8000/v1 \
17 |         --result_json_path="results/sec6_qwen7b_llm_judge_baseline_${difficulty}_max_round_${MAX_ROUND}.json" \
18 |         --load_cached_preds \
19 |         --cached_preds_path="results/sec5_revision_vanilla_qwen_7b_${difficulty}_max_round_${MAX_ROUND}.json"
20 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/scripts/sec6_o1_generated/4o_mini_vanilla_with_o1_generated_and_timeout_test.sh:
--------------------------------------------------------------------------------
 1 | MAX_ROUND=5
 2 | for difficulty in easy medium hard
 3 | do
 4 |     python evaluate_multiprocess.py \
 5 |         --difficulty=${difficulty} \
 6 |         --temperature=0.7 \
 7 |         --num_threads=16 \
 8 |         --n=8 \
 9 |         --selection=generated_tests\
10 |         --test_generator o1-mini \
11 |         --lcb_version release_v4 \
12 |         --start_date 2024-08-01 \
13 |         --end_date 2024-12-01 \
14 |         --num_round ${MAX_ROUND} \
15 |         --load_cached_preds \
16 |         --result_json_path="results/sec6_4o_mini_with_o1mini_generated_and_timeout_test_${difficulty}_max_round_${MAX_ROUND}.json" \
17 |         --cached_preds_path="results/sec5_revision_vanilla_4o_mini_${difficulty}_max_round_${MAX_ROUND}.json"
18 | done


--------------------------------------------------------------------------------
/skythought/test-time-scaling/util.py:
--------------------------------------------------------------------------------
 1 | import os, json
 2 | 
 3 | def post_process_code(code):
 4 |     code = code.split("</code>")[0]
 5 |     code = code.replace("```python", "")
 6 |     code = code.split("```")[0]
 7 |     code = code.replace("<code>", "")
 8 |     # print(f"postprocessed code: {code}")
 9 |     return code
10 | 
11 | name_map = {
12 |         "4o-mini": 'openai/gpt-4o-mini',
13 |         "4o": 'openai/gpt-4o',
14 |         "o1-mini": 'openai/o1-mini',
15 |         "o1": 'openai/o1-preview',
16 |         "o3-mini": 'openai/o3-mini',
17 |         "o1-preview": 'openai/o1-preview',
18 |         "qwen7b": 'Qwen/Qwen2.5-Coder-7B-Instruct',
19 |         "qwen32b": 'Qwen/Qwen2.5-Coder-32B-Instruct',
20 | }
21 | 
22 | if os.path.exists("v4_only_medium_correct_codes.json"):
23 |     ICL_EXAMPLES = json.load(open("v4_only_medium_correct_codes.json", "r"))
24 | else:
25 |     print("No ICL examples available")
26 |     ICL_EXAMPLES = {}


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/.dockerignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | .git
 3 | .github
 4 | .venv
 5 | cache
 6 | data
 7 | docker
 8 | saves
 9 | hf_cache
10 | ms_cache
11 | om_cache
12 | output
13 | .dockerignore
14 | .gitattributes
15 | .gitignore
16 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/.env.local:
--------------------------------------------------------------------------------
 1 | # Note: actually we do not support .env, just for reference
 2 | # api
 3 | API_HOST=
 4 | API_PORT=
 5 | API_KEY=
 6 | API_MODEL_NAME=
 7 | FASTAPI_ROOT_PATH=
 8 | MAX_CONCURRENT=
 9 | # general
10 | DISABLE_VERSION_CHECK=
11 | FORCE_CHECK_IMPORTS=
12 | LLAMAFACTORY_VERBOSITY=
13 | USE_MODELSCOPE_HUB=
14 | USE_OPENMIND_HUB=
15 | RECORD_VRAM=
16 | # torchrun
17 | FORCE_TORCHRUN=
18 | MASTER_ADDR=
19 | MASTER_PORT=
20 | NNODES=
21 | NODE_RANK=
22 | NPROC_PER_NODE=
23 | # wandb
24 | WANDB_DISABLED=
25 | WANDB_PROJECT=
26 | WANDB_API_KEY=
27 | # gradio ui
28 | GRADIO_SHARE=
29 | GRADIO_SERVER_NAME=
30 | GRADIO_SERVER_PORT=
31 | GRADIO_ROOT_PATH=
32 | GRADIO_IPV6=
33 | # setup
34 | ENABLE_SHORT_CONSOLE=1
35 | # reserved (do not use)
36 | LLAMABOARD_ENABLED=
37 | LLAMABOARD_WORKDIR=
38 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # What does this PR do?
2 | 
3 | Fixes # (issue)
4 | 
5 | ## Before submitting
6 | 
7 | - [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
8 | - [ ] Did you write any new necessary tests?
9 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/.github/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Reporting Security Issues
2 | 
3 | To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
4 | 
5 | We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
6 | 
7 | Report security bugs in third-party modules to the person or team maintaining the module.
8 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v5.0.0
 4 |     hooks:
 5 |     -   id: check-ast
 6 |     -   id: check-added-large-files
 7 |         args: ['--maxkb=25000']
 8 |     -   id: check-merge-conflict
 9 |     -   id: check-yaml
10 |     -   id: debug-statements
11 |     -   id: end-of-file-fixer
12 |     -   id: trailing-whitespace
13 |         args: [--markdown-linebreak-ext=md]
14 |     -   id: no-commit-to-branch
15 |         args: ['--branch', 'main']
16 | 
17 | -   repo: https://github.com/asottile/pyupgrade
18 |     rev: v3.17.0
19 |     hooks:
20 |     -   id: pyupgrade
21 |         args: [--py38-plus]
22 | 
23 | -   repo: https://github.com/astral-sh/ruff-pre-commit
24 |     rev: v0.6.9
25 |     hooks:
26 |     -   id: ruff
27 |         args: [--fix]
28 |     -   id: ruff-format
29 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE requirements.txt
2 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build commit quality style test
 2 | 
 3 | check_dirs := scripts src tests setup.py
 4 | 
 5 | build:
 6 | 	pip install build && python -m build
 7 | 
 8 | commit:
 9 | 	pre-commit install
10 | 	pre-commit run --all-files
11 | 
12 | quality:
13 | 	ruff check $(check_dirs)
14 | 	ruff format --check $(check_dirs)
15 | 
16 | style:
17 | 	ruff check $(check_dirs) --fix
18 | 	ruff format $(check_dirs)
19 | 
20 | test:
21 | 	CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/
22 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/assets/logo.png


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/assets/wechat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/assets/wechat.jpg


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/assets/wechat_npu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/assets/wechat_npu.jpg


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/data/mllm_demo_data/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/data/mllm_demo_data/1.jpg


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/data/mllm_demo_data/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/data/mllm_demo_data/1.mp4


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/data/mllm_demo_data/2.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/data/mllm_demo_data/2.avi


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/data/mllm_demo_data/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/data/mllm_demo_data/2.jpg


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/data/mllm_demo_data/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/data/mllm_demo_data/3.jpg


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/data/mllm_demo_data/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/data/mllm_demo_data/3.mp4


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/evaluation/ceval/ceval.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/evaluation/ceval/ceval.zip


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/evaluation/cmmlu/cmmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/evaluation/cmmlu/cmmlu.zip


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/evaluation/mmlu/mmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/evaluation/mmlu/mmlu.zip


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/accelerate/fsdp_config.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_forward_prefetch: false
 9 |   fsdp_cpu_ram_efficient_loading: true
10 |   fsdp_offload_params: true # offload may affect training speed
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: fp16 # or bf16
18 | num_machines: 1 # the number of nodes
19 | num_processes: 2 # the number of GPUs in all nodes
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/deepspeed/ds_z0_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 0,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": true,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/deepspeed/ds_z2_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": true,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/extras/adam_mini/qwen2_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | use_adam_mini: true
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: qwen
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/qwen2-1_5b/full/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-5
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/extras/fsdp_qlora/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # DO NOT use GPTQ/AWQ model in FSDP+QLoRA
3 | 
4 | CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
5 |     --config_file examples/accelerate/fsdp_config.yaml \
6 |     src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
7 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/extras/llama_pro/expand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/llama_pro.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pro \
6 |     --num_expand 8
7 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/extras/nlg_eval/llama3_lora_predict.yaml:
--------------------------------------------------------------------------------
 1 | # The batch generation can be SLOW using this config.
 2 | # For faster inference, we recommend to use `scripts/vllm_infer.py`.
 3 | 
 4 | ### model
 5 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 6 | adapter_name_or_path: saves/llama3-8b/lora/sft
 7 | 
 8 | ### method
 9 | stage: sft
10 | do_predict: true
11 | finetuning_type: lora
12 | 
13 | ### dataset
14 | eval_dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 50
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/predict
23 | overwrite_output_dir: true
24 | 
25 | ### eval
26 | per_device_eval_batch_size: 1
27 | predict_with_generate: true
28 | ddp_timeout: 180000000
29 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/extras/pissa/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/pissa_init.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pissa
6 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/inference/llama3.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/inference/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | adapter_name_or_path: saves/llama3-8b/lora/sft
3 | template: llama3
4 | finetuning_type: lora
5 | infer_backend: huggingface  # choices: [huggingface, vllm]
6 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/inference/llama3_vllm.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | infer_backend: vllm
4 | vllm_enforce_eager: true
5 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/inference/llava1_5.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: llava-hf/llava-1.5-7b-hf
2 | template: llava
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/inference/qwen2_vl.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
2 | template: qwen2_vl
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/merge_lora/llama3_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | template: llama3
 4 | 
 5 | ### export
 6 | export_dir: models/llama3_gptq
 7 | export_quantization_bit: 4
 8 | export_quantization_dataset: data/c4_demo.json
 9 | export_size: 2
10 | export_device: cpu
11 | export_legacy_format: false
12 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/merge_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 5 | adapter_name_or_path: saves/llama3-8b/lora/sft
 6 | template: llama3
 7 | finetuning_type: lora
 8 | 
 9 | ### export
10 | export_dir: models/llama3_lora_sft
11 | export_size: 2
12 | export_device: cpu
13 | export_legacy_format: false
14 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/merge_lora/qwen2vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 5 | adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
 6 | template: qwen2_vl
 7 | finetuning_type: lora
 8 | 
 9 | ### export
10 | export_dir: models/qwen2_vl_lora_sft
11 | export_size: 2
12 | export_device: cpu
13 | export_legacy_format: false
14 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/train_lora/llama3_lora_eval.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | adapter_name_or_path: saves/llama3-8b/lora/sft
 4 | 
 5 | ### method
 6 | finetuning_type: lora
 7 | 
 8 | ### dataset
 9 | task: mmlu_test  # choices: [mmlu_test, ceval_validation, cmmlu_test]
10 | template: fewshot
11 | lang: en
12 | n_shot: 5
13 | 
14 | ### output
15 | save_dir: saves/llama3-8b/lora/eval
16 | 
17 | ### eval
18 | batch_size: 4
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/train_lora/llama3_lora_pretrain.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: pt
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: c4_demo
12 | cutoff_len: 2048
13 | max_samples: 1000
14 | overwrite_cache: true
15 | preprocessing_num_workers: 16
16 | 
17 | ### output
18 | output_dir: saves/llama3-8b/lora/pretrain
19 | logging_steps: 10
20 | save_steps: 500
21 | plot_loss: true
22 | overwrite_output_dir: true
23 | 
24 | ### train
25 | per_device_train_batch_size: 1
26 | gradient_accumulation_steps: 8
27 | learning_rate: 1.0e-4
28 | num_train_epochs: 3.0
29 | lr_scheduler_type: cosine
30 | warmup_ratio: 0.1
31 | bf16: true
32 | ddp_timeout: 180000000
33 | 
34 | ### eval
35 | val_size: 0.1
36 | per_device_eval_batch_size: 1
37 | eval_strategy: steps
38 | eval_steps: 500
39 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/train_lora/llama3_lora_reward.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: rm
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: dpo_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/reward
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/train_lora/llama3_preprocess.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | tokenized_path: saves/llama3-8b/dataset/sft
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/sft
21 | overwrite_output_dir: true
22 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/examples/train_lora/llava1_5_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: llava-hf/llava-1.5-7b-hf
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: mllm_demo
12 | template: llava
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llava1_5-7b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.ruff]
 6 | target-version = "py38"
 7 | line-length = 119
 8 | indent-width = 4
 9 | 
10 | [tool.ruff.lint]
11 | ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
12 | select = ["C", "E", "F", "I", "W"]
13 | 
14 | [tool.ruff.lint.isort]
15 | lines-after-imports = 2
16 | known-first-party = ["llamafactory"]
17 | known-third-party = [
18 |     "accelerate",
19 |     "datasets",
20 |     "gradio",
21 |     "numpy",
22 |     "peft",
23 |     "torch",
24 |     "transformers",
25 |     "trl"
26 | ]
27 | 
28 | [tool.ruff.format]
29 | quote-style = "double"
30 | indent-style = "space"
31 | docstring-code-format = true
32 | skip-magic-trailing-comma = false
33 | line-ending = "auto"
34 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers>=4.41.2,<=4.46.1
 2 | datasets>=2.16.0,<=3.1.0
 3 | accelerate>=0.34.0,<=1.0.1
 4 | peft>=0.11.1,<=0.12.0
 5 | trl>=0.8.6,<=0.9.6
 6 | tokenizers>=0.19.0,<0.20.4
 7 | gradio>=4.0.0,<5.0.0
 8 | pandas>=2.0.0
 9 | scipy
10 | einops
11 | sentencepiece
12 | tiktoken
13 | protobuf
14 | uvicorn
15 | pydantic
16 | fastapi
17 | sse-starlette
18 | matplotlib>=3.7.0
19 | fire
20 | packaging
21 | pyyaml
22 | numpy<2.0.0
23 | av
24 | tyro<0.9.0
25 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/api/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/chat/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base_engine import BaseEngine
16 | from .chat_model import ChatModel
17 | 
18 | 
19 | __all__ = ["BaseEngine", "ChatModel"]
20 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/data/processors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/data/processors/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/eval/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/extras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/extras/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp  # use absolute import
16 | 
17 | 
18 | def launch():
19 |     run_exp()
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     launch()
24 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/model/model_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/model/model_utils/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/train/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/dpo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_dpo
16 | 
17 | 
18 | __all__ = ["run_dpo"]
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/kto/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_kto
16 | 
17 | 
18 | __all__ = ["run_kto"]
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_ppo
16 | 
17 | 
18 | __all__ = ["run_ppo"]
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/pt/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_pt
16 | 
17 | 
18 | __all__ = ["run_pt"]
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/rm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_rm
16 | 
17 | 
18 | __all__ = ["run_rm"]
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/train/sft/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_sft
16 | 
17 | 
18 | __all__ = ["run_sft"]
19 | 


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/llamafactory/webui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/skythought/train/LLaMA-Factory/src/llamafactory/webui/__init__.py


--------------------------------------------------------------------------------
/skythought/train/LLaMA-Factory/src/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp
16 | 
17 | 
18 | def main():
19 |     run_exp()
20 | 
21 | 
22 | def _mp_fn(index):
23 |     # For xla_spawn (TPUs)
24 |     run_exp()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/skythought/train/README.md:
--------------------------------------------------------------------------------
 1 | ## Training
 2 | We use a fork from [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) to perform training.
 3 | 
 4 | Step 1: Please add the data path produced by the tools directory or the one we provide, to the file_name field of Sky-T1 entry in [LLaMA-Factory/data/dataset_info.json](./LLaMA-Factory/data/dataset_info.json).
 5 | 
 6 | Step 2: run 
 7 | 
 8 | `FORCE_TORCHRUN=1 NNODES=1 NODE_RANK=0 MASTER_PORT=29501 llamafactory-cli train examples/train_full/qwen2_full_sft.yaml`
 9 | 
10 |  to train from a 32B model on 8 H100 GPUs. Interested readers can refer to the detailed settings in [examples/train_full/qwen2_full_sft.yaml](./LLaMA-Factory/examples/train_full/qwen2_full_sft.yaml).
11 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/tests/__init__.py


--------------------------------------------------------------------------------
/tests/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/tests/evals/__init__.py


--------------------------------------------------------------------------------
/tests/evals/scoring/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovaSky-AI/SkyThought/cf3b941afcd439cde56f47789579822398a7789d/tests/evals/scoring/__init__.py


--------------------------------------------------------------------------------