├── .env.example
├── .envrc
├── .flake8
├── .gitignore
├── LICENSE
├── README.md
├── instruction_format_json
    ├── alpaca_farm_prompt_format
    │   └── ln.json
    └── prompt_response_format
    │   ├── deberta_sep.json
    │   └── prompter_assistant.json
├── notebook
    └── fDPO-propostion.ipynb
├── poetry.lock
├── pyproject.toml
├── scripts
    ├── 1.4b
    │   ├── dpo_high.sh
    │   ├── dpo_mix.sh
    │   └── fdpo_mix.sh
    ├── 160m
    │   ├── dpo_high.sh
    │   ├── dpo_mix.sh
    │   └── fdpo_mix.sh
    └── test.sh
└── src
    └── filtered_dpo
        ├── __init__.py
        ├── __main__.py
        ├── callbacks.py
        ├── evaluation.py
        ├── reward_model.py
        └── utils.py


/.env.example:
--------------------------------------------------------------------------------
1 | HF_TOKEN=""
2 | 


--------------------------------------------------------------------------------
/.envrc:
--------------------------------------------------------------------------------
1 | dotenv
2 | # This repo src absolute path
3 | export PYTHONPATH=${PWD}/src
4 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | extend-ignore = E203
3 | max-line-length = 119


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/README.md


--------------------------------------------------------------------------------
/instruction_format_json/alpaca_farm_prompt_format/ln.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/instruction_format_json/alpaca_farm_prompt_format/ln.json


--------------------------------------------------------------------------------
/instruction_format_json/prompt_response_format/deberta_sep.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/instruction_format_json/prompt_response_format/deberta_sep.json


--------------------------------------------------------------------------------
/instruction_format_json/prompt_response_format/prompter_assistant.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/instruction_format_json/prompt_response_format/prompter_assistant.json


--------------------------------------------------------------------------------
/notebook/fDPO-propostion.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/notebook/fDPO-propostion.ipynb


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/poetry.lock


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/scripts/1.4b/dpo_high.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/1.4b/dpo_high.sh


--------------------------------------------------------------------------------
/scripts/1.4b/dpo_mix.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/1.4b/dpo_mix.sh


--------------------------------------------------------------------------------
/scripts/1.4b/fdpo_mix.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/1.4b/fdpo_mix.sh


--------------------------------------------------------------------------------
/scripts/160m/dpo_high.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/160m/dpo_high.sh


--------------------------------------------------------------------------------
/scripts/160m/dpo_mix.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/160m/dpo_mix.sh


--------------------------------------------------------------------------------
/scripts/160m/fdpo_mix.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/160m/fdpo_mix.sh


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/scripts/test.sh


--------------------------------------------------------------------------------
/src/filtered_dpo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/filtered_dpo/__main__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/src/filtered_dpo/__main__.py


--------------------------------------------------------------------------------
/src/filtered_dpo/callbacks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/src/filtered_dpo/callbacks.py


--------------------------------------------------------------------------------
/src/filtered_dpo/evaluation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/src/filtered_dpo/evaluation.py


--------------------------------------------------------------------------------
/src/filtered_dpo/reward_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/src/filtered_dpo/reward_model.py


--------------------------------------------------------------------------------
/src/filtered_dpo/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CyberAgentAILab/filtered-dpo/HEAD/src/filtered_dpo/utils.py


--------------------------------------------------------------------------------