├── .gitignore ├── LICENSE ├── README.md ├── config ├── ds_config_zero2.json └── ds_config_zero3.json ├── data ├── calculator │ ├── agent_calculator_test.json │ └── agent_calculator_train_cases.json ├── calendar │ ├── agent_calendar_test.json │ └── agent_calendar_train.json ├── code │ ├── regenerate_code_train_data.json │ └── reserve_code_test_data.json ├── hh_rlhf │ ├── hh_rlhf_test.json │ └── hh_rlhf_train.json ├── mix │ ├── agent_mix_test.json │ └── agent_mix_train.json ├── mix_hh │ ├── agent_mix_hh_test.json │ └── agent_mix_hh_train.json ├── multi_tools │ ├── agent_multi_tools_test_action.json │ └── agent_multi_tools_train_action.json ├── retarded_bar │ └── agent_retarde_bar_test.json ├── translator │ ├── agent_translator_test.json │ ├── agent_translator_train.json │ └── translate_history.json ├── truthful_qa │ ├── agent_truthful_qa_test_0-500.json │ ├── agent_truthful_qa_test_1500-3000.json │ ├── agent_truthful_qa_test_3000-4114.json │ └── agent_truthful_qa_test_500-1500.json ├── weather │ ├── agent_weather_test.json │ ├── agent_weather_train.json │ └── weather_history.json ├── webgpt │ ├── agent_webgpt_test_case.json │ └── agent_webgpt_train.json └── wikisearch │ ├── agent_wikisearch_test_obs.json │ └── agent_wikisearch_train_obs.json ├── generate_rm.py ├── generate_themis.py ├── main.py ├── requirements.txt ├── resource └── model.png ├── run_bert.py ├── scripts ├── generate_rm.sh ├── generate_themis.sh ├── train_bert.sh ├── train_rm.sh └── train_themis.sh └── src ├── __init__.py ├── data └── reward_dataset.py ├── models └── reward_model.py ├── template └── instruction_template.py ├── tools ├── __init__.py ├── calculator.py ├── calendar.py ├── code_interpreter.py ├── google_serper.py ├── translator.py ├── weather.py └── wikisearch.py └── utils ├── file_utils.py ├── metrics.py └── prompter.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/README.md -------------------------------------------------------------------------------- /config/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/config/ds_config_zero2.json -------------------------------------------------------------------------------- /config/ds_config_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/config/ds_config_zero3.json -------------------------------------------------------------------------------- /data/calculator/agent_calculator_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calculator/agent_calculator_test.json -------------------------------------------------------------------------------- /data/calculator/agent_calculator_train_cases.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calculator/agent_calculator_train_cases.json -------------------------------------------------------------------------------- /data/calendar/agent_calendar_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calendar/agent_calendar_test.json -------------------------------------------------------------------------------- /data/calendar/agent_calendar_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calendar/agent_calendar_train.json -------------------------------------------------------------------------------- /data/code/regenerate_code_train_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/code/regenerate_code_train_data.json -------------------------------------------------------------------------------- /data/code/reserve_code_test_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/code/reserve_code_test_data.json -------------------------------------------------------------------------------- /data/hh_rlhf/hh_rlhf_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/hh_rlhf/hh_rlhf_test.json -------------------------------------------------------------------------------- /data/hh_rlhf/hh_rlhf_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/hh_rlhf/hh_rlhf_train.json -------------------------------------------------------------------------------- /data/mix/agent_mix_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix/agent_mix_test.json -------------------------------------------------------------------------------- /data/mix/agent_mix_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix/agent_mix_train.json -------------------------------------------------------------------------------- /data/mix_hh/agent_mix_hh_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix_hh/agent_mix_hh_test.json -------------------------------------------------------------------------------- /data/mix_hh/agent_mix_hh_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix_hh/agent_mix_hh_train.json -------------------------------------------------------------------------------- /data/multi_tools/agent_multi_tools_test_action.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/multi_tools/agent_multi_tools_test_action.json -------------------------------------------------------------------------------- /data/multi_tools/agent_multi_tools_train_action.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/multi_tools/agent_multi_tools_train_action.json -------------------------------------------------------------------------------- /data/retarded_bar/agent_retarde_bar_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/retarded_bar/agent_retarde_bar_test.json -------------------------------------------------------------------------------- /data/translator/agent_translator_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/translator/agent_translator_test.json -------------------------------------------------------------------------------- /data/translator/agent_translator_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/translator/agent_translator_train.json -------------------------------------------------------------------------------- /data/translator/translate_history.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/translator/translate_history.json -------------------------------------------------------------------------------- /data/truthful_qa/agent_truthful_qa_test_0-500.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_0-500.json -------------------------------------------------------------------------------- /data/truthful_qa/agent_truthful_qa_test_1500-3000.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_1500-3000.json -------------------------------------------------------------------------------- /data/truthful_qa/agent_truthful_qa_test_3000-4114.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_3000-4114.json -------------------------------------------------------------------------------- /data/truthful_qa/agent_truthful_qa_test_500-1500.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_500-1500.json -------------------------------------------------------------------------------- /data/weather/agent_weather_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/weather/agent_weather_test.json -------------------------------------------------------------------------------- /data/weather/agent_weather_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/weather/agent_weather_train.json -------------------------------------------------------------------------------- /data/weather/weather_history.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/weather/weather_history.json -------------------------------------------------------------------------------- /data/webgpt/agent_webgpt_test_case.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/webgpt/agent_webgpt_test_case.json -------------------------------------------------------------------------------- /data/webgpt/agent_webgpt_train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/webgpt/agent_webgpt_train.json -------------------------------------------------------------------------------- /data/wikisearch/agent_wikisearch_test_obs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/wikisearch/agent_wikisearch_test_obs.json -------------------------------------------------------------------------------- /data/wikisearch/agent_wikisearch_train_obs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/wikisearch/agent_wikisearch_train_obs.json -------------------------------------------------------------------------------- /generate_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/generate_rm.py -------------------------------------------------------------------------------- /generate_themis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/generate_themis.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/main.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/requirements.txt -------------------------------------------------------------------------------- /resource/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/resource/model.png -------------------------------------------------------------------------------- /run_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/run_bert.py -------------------------------------------------------------------------------- /scripts/generate_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/generate_rm.sh -------------------------------------------------------------------------------- /scripts/generate_themis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/generate_themis.sh -------------------------------------------------------------------------------- /scripts/train_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/train_bert.sh -------------------------------------------------------------------------------- /scripts/train_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/train_rm.sh -------------------------------------------------------------------------------- /scripts/train_themis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/train_themis.sh -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | from .tools import * -------------------------------------------------------------------------------- /src/data/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/data/reward_dataset.py -------------------------------------------------------------------------------- /src/models/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/models/reward_model.py -------------------------------------------------------------------------------- /src/template/instruction_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/template/instruction_template.py -------------------------------------------------------------------------------- /src/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/__init__.py -------------------------------------------------------------------------------- /src/tools/calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/calculator.py -------------------------------------------------------------------------------- /src/tools/calendar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/calendar.py -------------------------------------------------------------------------------- /src/tools/code_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/code_interpreter.py -------------------------------------------------------------------------------- /src/tools/google_serper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/google_serper.py -------------------------------------------------------------------------------- /src/tools/translator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/translator.py -------------------------------------------------------------------------------- /src/tools/weather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/weather.py -------------------------------------------------------------------------------- /src/tools/wikisearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/wikisearch.py -------------------------------------------------------------------------------- /src/utils/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/utils/file_utils.py -------------------------------------------------------------------------------- /src/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/utils/metrics.py -------------------------------------------------------------------------------- /src/utils/prompter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/utils/prompter.py --------------------------------------------------------------------------------