├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── ds_config_zero2.json
    └── ds_config_zero3.json
├── data
    ├── calculator
    │   ├── agent_calculator_test.json
    │   └── agent_calculator_train_cases.json
    ├── calendar
    │   ├── agent_calendar_test.json
    │   └── agent_calendar_train.json
    ├── code
    │   ├── regenerate_code_train_data.json
    │   └── reserve_code_test_data.json
    ├── hh_rlhf
    │   ├── hh_rlhf_test.json
    │   └── hh_rlhf_train.json
    ├── mix
    │   ├── agent_mix_test.json
    │   └── agent_mix_train.json
    ├── mix_hh
    │   ├── agent_mix_hh_test.json
    │   └── agent_mix_hh_train.json
    ├── multi_tools
    │   ├── agent_multi_tools_test_action.json
    │   └── agent_multi_tools_train_action.json
    ├── retarded_bar
    │   └── agent_retarde_bar_test.json
    ├── translator
    │   ├── agent_translator_test.json
    │   ├── agent_translator_train.json
    │   └── translate_history.json
    ├── truthful_qa
    │   ├── agent_truthful_qa_test_0-500.json
    │   ├── agent_truthful_qa_test_1500-3000.json
    │   ├── agent_truthful_qa_test_3000-4114.json
    │   └── agent_truthful_qa_test_500-1500.json
    ├── weather
    │   ├── agent_weather_test.json
    │   ├── agent_weather_train.json
    │   └── weather_history.json
    ├── webgpt
    │   ├── agent_webgpt_test_case.json
    │   └── agent_webgpt_train.json
    └── wikisearch
    │   ├── agent_wikisearch_test_obs.json
    │   └── agent_wikisearch_train_obs.json
├── generate_rm.py
├── generate_themis.py
├── main.py
├── requirements.txt
├── resource
    └── model.png
├── run_bert.py
├── scripts
    ├── generate_rm.sh
    ├── generate_themis.sh
    ├── train_bert.sh
    ├── train_rm.sh
    └── train_themis.sh
└── src
    ├── __init__.py
    ├── data
        └── reward_dataset.py
    ├── models
        └── reward_model.py
    ├── template
        └── instruction_template.py
    ├── tools
        ├── __init__.py
        ├── calculator.py
        ├── calendar.py
        ├── code_interpreter.py
        ├── google_serper.py
        ├── translator.py
        ├── weather.py
        └── wikisearch.py
    └── utils
        ├── file_utils.py
        ├── metrics.py
        └── prompter.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/README.md


--------------------------------------------------------------------------------
/config/ds_config_zero2.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/config/ds_config_zero2.json


--------------------------------------------------------------------------------
/config/ds_config_zero3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/config/ds_config_zero3.json


--------------------------------------------------------------------------------
/data/calculator/agent_calculator_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calculator/agent_calculator_test.json


--------------------------------------------------------------------------------
/data/calculator/agent_calculator_train_cases.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calculator/agent_calculator_train_cases.json


--------------------------------------------------------------------------------
/data/calendar/agent_calendar_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calendar/agent_calendar_test.json


--------------------------------------------------------------------------------
/data/calendar/agent_calendar_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/calendar/agent_calendar_train.json


--------------------------------------------------------------------------------
/data/code/regenerate_code_train_data.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/code/regenerate_code_train_data.json


--------------------------------------------------------------------------------
/data/code/reserve_code_test_data.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/code/reserve_code_test_data.json


--------------------------------------------------------------------------------
/data/hh_rlhf/hh_rlhf_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/hh_rlhf/hh_rlhf_test.json


--------------------------------------------------------------------------------
/data/hh_rlhf/hh_rlhf_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/hh_rlhf/hh_rlhf_train.json


--------------------------------------------------------------------------------
/data/mix/agent_mix_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix/agent_mix_test.json


--------------------------------------------------------------------------------
/data/mix/agent_mix_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix/agent_mix_train.json


--------------------------------------------------------------------------------
/data/mix_hh/agent_mix_hh_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix_hh/agent_mix_hh_test.json


--------------------------------------------------------------------------------
/data/mix_hh/agent_mix_hh_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/mix_hh/agent_mix_hh_train.json


--------------------------------------------------------------------------------
/data/multi_tools/agent_multi_tools_test_action.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/multi_tools/agent_multi_tools_test_action.json


--------------------------------------------------------------------------------
/data/multi_tools/agent_multi_tools_train_action.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/multi_tools/agent_multi_tools_train_action.json


--------------------------------------------------------------------------------
/data/retarded_bar/agent_retarde_bar_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/retarded_bar/agent_retarde_bar_test.json


--------------------------------------------------------------------------------
/data/translator/agent_translator_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/translator/agent_translator_test.json


--------------------------------------------------------------------------------
/data/translator/agent_translator_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/translator/agent_translator_train.json


--------------------------------------------------------------------------------
/data/translator/translate_history.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/translator/translate_history.json


--------------------------------------------------------------------------------
/data/truthful_qa/agent_truthful_qa_test_0-500.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_0-500.json


--------------------------------------------------------------------------------
/data/truthful_qa/agent_truthful_qa_test_1500-3000.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_1500-3000.json


--------------------------------------------------------------------------------
/data/truthful_qa/agent_truthful_qa_test_3000-4114.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_3000-4114.json


--------------------------------------------------------------------------------
/data/truthful_qa/agent_truthful_qa_test_500-1500.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/truthful_qa/agent_truthful_qa_test_500-1500.json


--------------------------------------------------------------------------------
/data/weather/agent_weather_test.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/weather/agent_weather_test.json


--------------------------------------------------------------------------------
/data/weather/agent_weather_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/weather/agent_weather_train.json


--------------------------------------------------------------------------------
/data/weather/weather_history.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/weather/weather_history.json


--------------------------------------------------------------------------------
/data/webgpt/agent_webgpt_test_case.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/webgpt/agent_webgpt_test_case.json


--------------------------------------------------------------------------------
/data/webgpt/agent_webgpt_train.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/webgpt/agent_webgpt_train.json


--------------------------------------------------------------------------------
/data/wikisearch/agent_wikisearch_test_obs.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/wikisearch/agent_wikisearch_test_obs.json


--------------------------------------------------------------------------------
/data/wikisearch/agent_wikisearch_train_obs.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/data/wikisearch/agent_wikisearch_train_obs.json


--------------------------------------------------------------------------------
/generate_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/generate_rm.py


--------------------------------------------------------------------------------
/generate_themis.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/generate_themis.py


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/main.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/requirements.txt


--------------------------------------------------------------------------------
/resource/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/resource/model.png


--------------------------------------------------------------------------------
/run_bert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/run_bert.py


--------------------------------------------------------------------------------
/scripts/generate_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/generate_rm.sh


--------------------------------------------------------------------------------
/scripts/generate_themis.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/generate_themis.sh


--------------------------------------------------------------------------------
/scripts/train_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/train_bert.sh


--------------------------------------------------------------------------------
/scripts/train_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/train_rm.sh


--------------------------------------------------------------------------------
/scripts/train_themis.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/scripts/train_themis.sh


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .tools import *


--------------------------------------------------------------------------------
/src/data/reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/data/reward_dataset.py


--------------------------------------------------------------------------------
/src/models/reward_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/models/reward_model.py


--------------------------------------------------------------------------------
/src/template/instruction_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/template/instruction_template.py


--------------------------------------------------------------------------------
/src/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/__init__.py


--------------------------------------------------------------------------------
/src/tools/calculator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/calculator.py


--------------------------------------------------------------------------------
/src/tools/calendar.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/calendar.py


--------------------------------------------------------------------------------
/src/tools/code_interpreter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/code_interpreter.py


--------------------------------------------------------------------------------
/src/tools/google_serper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/google_serper.py


--------------------------------------------------------------------------------
/src/tools/translator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/translator.py


--------------------------------------------------------------------------------
/src/tools/weather.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/weather.py


--------------------------------------------------------------------------------
/src/tools/wikisearch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/tools/wikisearch.py


--------------------------------------------------------------------------------
/src/utils/file_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/utils/file_utils.py


--------------------------------------------------------------------------------
/src/utils/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/utils/metrics.py


--------------------------------------------------------------------------------
/src/utils/prompter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ernie-research/Tool-Augmented-Reward-Model/HEAD/src/utils/prompter.py


--------------------------------------------------------------------------------