├── .gitignore ├── README.md ├── assets ├── 2048.mp4 ├── curves.png ├── image.png ├── shisensho.mp4 ├── test_2048.png └── vlmgym.png ├── setup.py ├── setup.sh ├── training ├── EasyR1 │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── Dockerfile │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── assets │ │ ├── easyr1_grpo.png │ │ ├── qwen2_5_vl_7b_geo.png │ │ └── wechat.jpg │ ├── examples │ │ ├── baselines │ │ │ ├── qwen2_5_vl_3b_clevr.sh │ │ │ └── qwen2_5_vl_3b_geoqa8k.sh │ │ ├── config.yaml │ │ └── runtime_env.yaml │ ├── pyproject.toml │ ├── requirements.txt │ ├── scripts │ │ └── model_merger.py │ ├── setup.py │ └── verl │ │ ├── __init__.py │ │ ├── models │ │ ├── __init__.py │ │ ├── monkey_patch.py │ │ └── transformers │ │ │ ├── __init__.py │ │ │ ├── flash_attention_utils.py │ │ │ └── qwen2_vl.py │ │ ├── protocol.py │ │ ├── single_controller │ │ ├── __init__.py │ │ ├── base │ │ │ ├── __init__.py │ │ │ ├── decorator.py │ │ │ ├── register_center │ │ │ │ ├── __init__.py │ │ │ │ └── ray.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ └── ray │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── trainer │ │ ├── __init__.py │ │ ├── config.py │ │ ├── core_algos.py │ │ ├── main.py │ │ ├── main_sandbox.py │ │ ├── main_sandbox_match.py │ │ ├── main_sandbox_match3.py │ │ ├── main_sandbox_match_cifar10.py │ │ ├── metrics.py │ │ ├── ray_sandbox_trainer_2048.py │ │ ├── ray_sandbox_trainer_match.py │ │ ├── ray_sandbox_trainer_match3.py │ │ ├── ray_sandbox_trainer_match_cifar10.py │ │ ├── ray_trainer.py │ │ └── runtime_env.yaml │ │ ├── utils │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ ├── __init__.py │ │ │ ├── checkpoint_manager.py │ │ │ └── fsdp_checkpoint_manager.py │ │ ├── dataset.py │ │ ├── flops_counter.py │ │ ├── fsdp_utils.py │ │ ├── logger │ │ │ ├── __init__.py │ │ │ └── aggregate_logger.py │ │ ├── model_utils.py │ │ ├── py_functional.py │ │ ├── reward_score │ │ │ ├── __init__.py │ │ │ ├── math.py │ │ │ └── r1v.py │ │ ├── seqlen_balancing.py │ │ ├── tokenizer.py │ │ ├── torch_dtypes.py │ │ ├── torch_functional.py │ │ ├── tracking.py │ │ └── ulysses.py │ │ └── workers │ │ ├── __init__.py │ │ ├── actor │ │ ├── __init__.py │ │ ├── base.py │ │ ├── config.py │ │ └── dp_actor.py │ │ ├── config.py │ │ ├── critic │ │ ├── __init__.py │ │ ├── base.py │ │ ├── config.py │ │ └── dp_critic.py │ │ ├── fsdp_workers.py │ │ ├── reward │ │ ├── __init__.py │ │ ├── config.py │ │ └── custom.py │ │ ├── rollout │ │ ├── __init__.py │ │ ├── base.py │ │ ├── config.py │ │ └── vllm_rollout │ │ │ ├── __init__.py │ │ │ └── vllm_rollout_spmd.py │ │ └── sharding_manager │ │ ├── __init__.py │ │ ├── base.py │ │ ├── fsdp_ulysses.py │ │ └── fsdp_vllm.py └── scripts │ ├── rl_2048.sh │ ├── rl_shisensho.sh │ ├── rl_shisensho_cifar10.sh │ └── rl_swap.sh └── vlmgym ├── gymnasium-2048 ├── .github │ └── workflows │ │ ├── build.yml │ │ └── publish.yml ├── .gitignore ├── .idea │ ├── .gitignore │ ├── gymnasium-2048.iml │ └── modules.xml ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── figures │ ├── observation.png │ ├── stats_random_policy.png │ ├── stats_tdl.png │ ├── stats_tdl_small.png │ ├── training_tdl.png │ └── training_tdl_small.png ├── models │ ├── tdl │ │ └── best_n_tuple_network_policy.zip │ └── tdl_small │ │ └── best_n_tuple_network_policy.zip ├── pyproject.toml ├── scripts │ ├── enjoy.py │ ├── evaluate.py │ ├── play.py │ ├── plot.py │ ├── random_policy.py │ └── train.py ├── setup.py ├── src │ └── gymnasium_2048 │ │ ├── __init__.py │ │ ├── agents │ │ ├── __init__.py │ │ └── ntuple │ │ │ ├── __init__.py │ │ │ ├── factory.py │ │ │ ├── network.py │ │ │ └── policy.py │ │ ├── envs │ │ ├── __init__.py │ │ └── twenty_forty_eight.py │ │ └── wrappers │ │ ├── __init__.py │ │ ├── illegal_reward.py │ │ ├── terminate_goal.py │ │ └── terminate_illegal.py └── tests │ ├── test_agents.py │ └── test_envs.py ├── sandbox ├── MetaSandbox.py └── games │ ├── __init__.py │ ├── game.py │ ├── game2048.py │ ├── gamebreakout.py │ ├── gamematch.py │ ├── gamematch3.py │ └── gamematch_cifar.py └── test ├── eval_2048.py ├── eval_shisensho.py ├── eval_shisensho_cifar10.py └── eval_swap.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/README.md -------------------------------------------------------------------------------- /assets/2048.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/assets/2048.mp4 -------------------------------------------------------------------------------- /assets/curves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/assets/curves.png -------------------------------------------------------------------------------- /assets/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/assets/image.png -------------------------------------------------------------------------------- /assets/shisensho.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/assets/shisensho.mp4 -------------------------------------------------------------------------------- /assets/test_2048.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/assets/test_2048.png -------------------------------------------------------------------------------- /assets/vlmgym.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/assets/vlmgym.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/setup.py -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/setup.sh -------------------------------------------------------------------------------- /training/EasyR1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/.gitignore -------------------------------------------------------------------------------- /training/EasyR1/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/.pre-commit-config.yaml -------------------------------------------------------------------------------- /training/EasyR1/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/Dockerfile -------------------------------------------------------------------------------- /training/EasyR1/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/LICENSE -------------------------------------------------------------------------------- /training/EasyR1/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/Makefile -------------------------------------------------------------------------------- /training/EasyR1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/README.md -------------------------------------------------------------------------------- /training/EasyR1/assets/easyr1_grpo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/assets/easyr1_grpo.png -------------------------------------------------------------------------------- /training/EasyR1/assets/qwen2_5_vl_7b_geo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/assets/qwen2_5_vl_7b_geo.png -------------------------------------------------------------------------------- /training/EasyR1/assets/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/assets/wechat.jpg -------------------------------------------------------------------------------- /training/EasyR1/examples/baselines/qwen2_5_vl_3b_clevr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/examples/baselines/qwen2_5_vl_3b_clevr.sh -------------------------------------------------------------------------------- /training/EasyR1/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh -------------------------------------------------------------------------------- /training/EasyR1/examples/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/examples/config.yaml -------------------------------------------------------------------------------- /training/EasyR1/examples/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | -------------------------------------------------------------------------------- /training/EasyR1/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/pyproject.toml -------------------------------------------------------------------------------- /training/EasyR1/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/requirements.txt -------------------------------------------------------------------------------- /training/EasyR1/scripts/model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/scripts/model_merger.py -------------------------------------------------------------------------------- /training/EasyR1/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/setup.py -------------------------------------------------------------------------------- /training/EasyR1/verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/models/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/models/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/models/monkey_patch.py -------------------------------------------------------------------------------- /training/EasyR1/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/models/transformers/flash_attention_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/models/transformers/flash_attention_utils.py -------------------------------------------------------------------------------- /training/EasyR1/verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /training/EasyR1/verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/protocol.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/config.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/core_algos.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/main.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/main_sandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/main_sandbox.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/main_sandbox_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/main_sandbox_match.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/main_sandbox_match3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/main_sandbox_match3.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/main_sandbox_match_cifar10.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/main_sandbox_match_cifar10.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/metrics.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/ray_sandbox_trainer_2048.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/ray_sandbox_trainer_2048.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/ray_sandbox_trainer_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/ray_sandbox_trainer_match.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/ray_sandbox_trainer_match3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/ray_sandbox_trainer_match3.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/ray_sandbox_trainer_match_cifar10.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/ray_sandbox_trainer_match_cifar10.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/ray_trainer.py -------------------------------------------------------------------------------- /training/EasyR1/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/dataset.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/model_utils.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/py_functional.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/reward_score/r1v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/reward_score/r1v.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/tracking.py -------------------------------------------------------------------------------- /training/EasyR1/verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/utils/ulysses.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/actor/base.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/actor/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/actor/config.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/config.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/critic/base.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/critic/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/critic/config.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/reward/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/reward/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/reward/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/reward/config.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/reward/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/reward/custom.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/rollout/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/rollout/config.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /training/EasyR1/verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/EasyR1/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /training/scripts/rl_2048.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/scripts/rl_2048.sh -------------------------------------------------------------------------------- /training/scripts/rl_shisensho.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/scripts/rl_shisensho.sh -------------------------------------------------------------------------------- /training/scripts/rl_shisensho_cifar10.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/scripts/rl_shisensho_cifar10.sh -------------------------------------------------------------------------------- /training/scripts/rl_swap.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/training/scripts/rl_swap.sh -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.github/workflows/build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.github/workflows/build.yml -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.github/workflows/publish.yml -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.gitignore -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.idea/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.idea/.gitignore -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.idea/gymnasium-2048.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.idea/gymnasium-2048.iml -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.idea/modules.xml -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/.pre-commit-config.yaml -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/LICENSE -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/README.md -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/figures/observation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/figures/observation.png -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/figures/stats_random_policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/figures/stats_random_policy.png -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/figures/stats_tdl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/figures/stats_tdl.png -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/figures/stats_tdl_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/figures/stats_tdl_small.png -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/figures/training_tdl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/figures/training_tdl.png -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/figures/training_tdl_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/figures/training_tdl_small.png -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/models/tdl/best_n_tuple_network_policy.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/models/tdl/best_n_tuple_network_policy.zip -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/models/tdl_small/best_n_tuple_network_policy.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/models/tdl_small/best_n_tuple_network_policy.zip -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/pyproject.toml -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/scripts/enjoy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/scripts/enjoy.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/scripts/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/scripts/evaluate.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/scripts/play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/scripts/play.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/scripts/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/scripts/plot.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/scripts/random_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/scripts/random_policy.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/scripts/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/scripts/train.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/setup.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/__init__.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/agents/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/__init__.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/factory.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/network.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/agents/ntuple/policy.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/envs/__init__.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/envs/twenty_forty_eight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/envs/twenty_forty_eight.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/__init__.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/illegal_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/illegal_reward.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/terminate_goal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/terminate_goal.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/terminate_illegal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/src/gymnasium_2048/wrappers/terminate_illegal.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/tests/test_agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/tests/test_agents.py -------------------------------------------------------------------------------- /vlmgym/gymnasium-2048/tests/test_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/gymnasium-2048/tests/test_envs.py -------------------------------------------------------------------------------- /vlmgym/sandbox/MetaSandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/MetaSandbox.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/__init__.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/game.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/game2048.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/game2048.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/gamebreakout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/gamebreakout.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/gamematch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/gamematch.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/gamematch3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/gamematch3.py -------------------------------------------------------------------------------- /vlmgym/sandbox/games/gamematch_cifar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/sandbox/games/gamematch_cifar.py -------------------------------------------------------------------------------- /vlmgym/test/eval_2048.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/test/eval_2048.py -------------------------------------------------------------------------------- /vlmgym/test/eval_shisensho.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/test/eval_shisensho.py -------------------------------------------------------------------------------- /vlmgym/test/eval_shisensho_cifar10.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/test/eval_shisensho_cifar10.py -------------------------------------------------------------------------------- /vlmgym/test/eval_swap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenllliang/G1/HEAD/vlmgym/test/eval_swap.py --------------------------------------------------------------------------------