├── .github └── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ ├── feature-request.yml │ └── questions.yml ├── LICENSE ├── README.md ├── breakthrough ├── README.md ├── build_train_and_test.ipynb ├── collect_initial_state_data.sh ├── collect_look_ahead_data.sh ├── collect_rollout_data.py ├── collect_win_rate_data.sh ├── configs │ ├── evaluate_config.py │ ├── prompt_llm_config.py │ ├── rollout_config.py │ └── win_rate.py ├── count_win_rate.py ├── data_for_train.py ├── eval_gpt4o_batch.py ├── evaluate.py ├── pipeline.sh └── prompt_llm.py ├── maze ├── README.md ├── __init__.py ├── env │ ├── __init__.py │ ├── env.py │ ├── environment.py │ ├── maze_utils.py │ ├── mazes.py │ ├── policies.py │ ├── randomness.py │ └── utils.py ├── environment.yml ├── gpt4 │ └── nlrl_maze.py ├── misc │ ├── __init__.py │ ├── compute_score_normalizations.py │ ├── convert_checkpoint_to_params.sh │ ├── convert_hf_checkpoint.py │ ├── convert_ilql_checkpoint.py │ └── view_data.py ├── raw_text.txt └── setup.py ├── nlrl ├── .gitignore ├── __init__.py ├── config.py ├── envs │ ├── __init__.py │ ├── breakthrough │ │ └── prompt.py │ ├── maze │ │ └── prompt.py │ └── tictactoe │ │ ├── __init__.py │ │ ├── func_utils.py │ │ ├── gen_value_prompt.py │ │ ├── policy_eval.py │ │ ├── prompt.py │ │ └── tictactoe.py ├── evaluate.py ├── llm_call.py ├── offline_infer.py ├── policy │ ├── __init__.py │ ├── llm_policy.py │ ├── llm_ppo_policy.py │ ├── mcts.py │ ├── minmax.py │ └── tictactoe_policy.py ├── train │ ├── dataset.py │ ├── model_config.py │ ├── train.sh │ └── train_sft.py └── utils.py ├── requirements.txt └── tictactoe ├── README.md ├── collect_rollout_data.py ├── data_for_train.py ├── environment.yml ├── experiments ├── run_gpt4o.sh ├── run_llama3.1_70b_prompting.sh ├── run_llama3.1_8b_ppo.sh ├── run_llama3.1_8b_prompting.sh ├── run_nlrl.sh └── run_nlrl_wo_action_selection_mask.sh ├── gym-tictactoe ├── .gitignore ├── LICENSE ├── README.md ├── examples │ ├── __init__.py │ ├── base_agent.py │ ├── gsmodels │ │ └── .gitignore │ ├── human_agent.py │ └── td_agent.py ├── gym_tictactoe │ ├── __init__.py │ └── env.py └── setup.py ├── merge_train_data.py ├── prompt_llm.py ├── scripts ├── pipeline_gpt4o.sh ├── pipeline_llama3.1_70b_prompting.sh ├── pipeline_llama3.1_8b_ppo.sh ├── pipeline_llama3.1_8b_prompting.sh ├── pipeline_nlac.sh └── pipeline_random.sh ├── train ├── dataset.py ├── model_config.py └── train_sft.py └── train_ppo.py /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/.github/ISSUE_TEMPLATE/bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/.github/ISSUE_TEMPLATE/config.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/.github/ISSUE_TEMPLATE/feature-request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/questions.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/.github/ISSUE_TEMPLATE/questions.yml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/README.md -------------------------------------------------------------------------------- /breakthrough/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/README.md -------------------------------------------------------------------------------- /breakthrough/build_train_and_test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/build_train_and_test.ipynb -------------------------------------------------------------------------------- /breakthrough/collect_initial_state_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/collect_initial_state_data.sh -------------------------------------------------------------------------------- /breakthrough/collect_look_ahead_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/collect_look_ahead_data.sh -------------------------------------------------------------------------------- /breakthrough/collect_rollout_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/collect_rollout_data.py -------------------------------------------------------------------------------- /breakthrough/collect_win_rate_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/collect_win_rate_data.sh -------------------------------------------------------------------------------- /breakthrough/configs/evaluate_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/configs/evaluate_config.py -------------------------------------------------------------------------------- /breakthrough/configs/prompt_llm_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/configs/prompt_llm_config.py -------------------------------------------------------------------------------- /breakthrough/configs/rollout_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/configs/rollout_config.py -------------------------------------------------------------------------------- /breakthrough/configs/win_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/configs/win_rate.py -------------------------------------------------------------------------------- /breakthrough/count_win_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/count_win_rate.py -------------------------------------------------------------------------------- /breakthrough/data_for_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/data_for_train.py -------------------------------------------------------------------------------- /breakthrough/eval_gpt4o_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/eval_gpt4o_batch.py -------------------------------------------------------------------------------- /breakthrough/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/evaluate.py -------------------------------------------------------------------------------- /breakthrough/pipeline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/pipeline.sh -------------------------------------------------------------------------------- /breakthrough/prompt_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/breakthrough/prompt_llm.py -------------------------------------------------------------------------------- /maze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/README.md -------------------------------------------------------------------------------- /maze/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maze/env/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maze/env/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/env.py -------------------------------------------------------------------------------- /maze/env/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/environment.py -------------------------------------------------------------------------------- /maze/env/maze_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/maze_utils.py -------------------------------------------------------------------------------- /maze/env/mazes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/mazes.py -------------------------------------------------------------------------------- /maze/env/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/policies.py -------------------------------------------------------------------------------- /maze/env/randomness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/randomness.py -------------------------------------------------------------------------------- /maze/env/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/env/utils.py -------------------------------------------------------------------------------- /maze/environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/environment.yml -------------------------------------------------------------------------------- /maze/gpt4/nlrl_maze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/gpt4/nlrl_maze.py -------------------------------------------------------------------------------- /maze/misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maze/misc/compute_score_normalizations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/misc/compute_score_normalizations.py -------------------------------------------------------------------------------- /maze/misc/convert_checkpoint_to_params.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/misc/convert_checkpoint_to_params.sh -------------------------------------------------------------------------------- /maze/misc/convert_hf_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/misc/convert_hf_checkpoint.py -------------------------------------------------------------------------------- /maze/misc/convert_ilql_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/misc/convert_ilql_checkpoint.py -------------------------------------------------------------------------------- /maze/misc/view_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/misc/view_data.py -------------------------------------------------------------------------------- /maze/raw_text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/raw_text.txt -------------------------------------------------------------------------------- /maze/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/maze/setup.py -------------------------------------------------------------------------------- /nlrl/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/.gitignore -------------------------------------------------------------------------------- /nlrl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nlrl/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/config.py -------------------------------------------------------------------------------- /nlrl/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/__init__.py -------------------------------------------------------------------------------- /nlrl/envs/breakthrough/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/breakthrough/prompt.py -------------------------------------------------------------------------------- /nlrl/envs/maze/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/maze/prompt.py -------------------------------------------------------------------------------- /nlrl/envs/tictactoe/__init__.py: -------------------------------------------------------------------------------- 1 | from .func_utils import * 2 | -------------------------------------------------------------------------------- /nlrl/envs/tictactoe/func_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/tictactoe/func_utils.py -------------------------------------------------------------------------------- /nlrl/envs/tictactoe/gen_value_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/tictactoe/gen_value_prompt.py -------------------------------------------------------------------------------- /nlrl/envs/tictactoe/policy_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/tictactoe/policy_eval.py -------------------------------------------------------------------------------- /nlrl/envs/tictactoe/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/tictactoe/prompt.py -------------------------------------------------------------------------------- /nlrl/envs/tictactoe/tictactoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/envs/tictactoe/tictactoe.py -------------------------------------------------------------------------------- /nlrl/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/evaluate.py -------------------------------------------------------------------------------- /nlrl/llm_call.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/llm_call.py -------------------------------------------------------------------------------- /nlrl/offline_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/offline_infer.py -------------------------------------------------------------------------------- /nlrl/policy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/policy/__init__.py -------------------------------------------------------------------------------- /nlrl/policy/llm_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/policy/llm_policy.py -------------------------------------------------------------------------------- /nlrl/policy/llm_ppo_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/policy/llm_ppo_policy.py -------------------------------------------------------------------------------- /nlrl/policy/mcts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/policy/mcts.py -------------------------------------------------------------------------------- /nlrl/policy/minmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/policy/minmax.py -------------------------------------------------------------------------------- /nlrl/policy/tictactoe_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/policy/tictactoe_policy.py -------------------------------------------------------------------------------- /nlrl/train/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/train/dataset.py -------------------------------------------------------------------------------- /nlrl/train/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/train/model_config.py -------------------------------------------------------------------------------- /nlrl/train/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/train/train.sh -------------------------------------------------------------------------------- /nlrl/train/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/train/train_sft.py -------------------------------------------------------------------------------- /nlrl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/nlrl/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/requirements.txt -------------------------------------------------------------------------------- /tictactoe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/README.md -------------------------------------------------------------------------------- /tictactoe/collect_rollout_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/collect_rollout_data.py -------------------------------------------------------------------------------- /tictactoe/data_for_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/data_for_train.py -------------------------------------------------------------------------------- /tictactoe/environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/environment.yml -------------------------------------------------------------------------------- /tictactoe/experiments/run_gpt4o.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/experiments/run_gpt4o.sh -------------------------------------------------------------------------------- /tictactoe/experiments/run_llama3.1_70b_prompting.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/experiments/run_llama3.1_70b_prompting.sh -------------------------------------------------------------------------------- /tictactoe/experiments/run_llama3.1_8b_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/experiments/run_llama3.1_8b_ppo.sh -------------------------------------------------------------------------------- /tictactoe/experiments/run_llama3.1_8b_prompting.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/experiments/run_llama3.1_8b_prompting.sh -------------------------------------------------------------------------------- /tictactoe/experiments/run_nlrl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/experiments/run_nlrl.sh -------------------------------------------------------------------------------- /tictactoe/experiments/run_nlrl_wo_action_selection_mask.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/experiments/run_nlrl_wo_action_selection_mask.sh -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/.gitignore -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/LICENSE -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/README.md -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/examples/base_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/examples/base_agent.py -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/examples/gsmodels/.gitignore: -------------------------------------------------------------------------------- 1 | *.* 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/examples/human_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/examples/human_agent.py -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/examples/td_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/examples/td_agent.py -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/gym_tictactoe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/gym_tictactoe/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/gym_tictactoe/env.py -------------------------------------------------------------------------------- /tictactoe/gym-tictactoe/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/gym-tictactoe/setup.py -------------------------------------------------------------------------------- /tictactoe/merge_train_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/merge_train_data.py -------------------------------------------------------------------------------- /tictactoe/prompt_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/prompt_llm.py -------------------------------------------------------------------------------- /tictactoe/scripts/pipeline_gpt4o.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/scripts/pipeline_gpt4o.sh -------------------------------------------------------------------------------- /tictactoe/scripts/pipeline_llama3.1_70b_prompting.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/scripts/pipeline_llama3.1_70b_prompting.sh -------------------------------------------------------------------------------- /tictactoe/scripts/pipeline_llama3.1_8b_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/scripts/pipeline_llama3.1_8b_ppo.sh -------------------------------------------------------------------------------- /tictactoe/scripts/pipeline_llama3.1_8b_prompting.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/scripts/pipeline_llama3.1_8b_prompting.sh -------------------------------------------------------------------------------- /tictactoe/scripts/pipeline_nlac.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/scripts/pipeline_nlac.sh -------------------------------------------------------------------------------- /tictactoe/scripts/pipeline_random.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/scripts/pipeline_random.sh -------------------------------------------------------------------------------- /tictactoe/train/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/train/dataset.py -------------------------------------------------------------------------------- /tictactoe/train/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/train/model_config.py -------------------------------------------------------------------------------- /tictactoe/train/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/train/train_sft.py -------------------------------------------------------------------------------- /tictactoe/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/Natural-language-RL/HEAD/tictactoe/train_ppo.py --------------------------------------------------------------------------------