├── README.md
├── code
    ├── evalaute.py
    ├── inference.py
    └── models
    │   ├── __pycache__
    │       ├── abab.cpython-311.pyc
    │       ├── deepseek_v2.cpython-311.pyc
    │       ├── ernie35.cpython-311.pyc
    │       ├── ernie4.cpython-311.pyc
    │       ├── ernie4turbo.cpython-311.pyc
    │       ├── glm4.cpython-311.pyc
    │       ├── gpt35_turbo_1106.cpython-311.pyc
    │       ├── gpt4_0125_preview.cpython-311.pyc
    │       ├── gpt4_turbo_0409.cpython-311.pyc
    │       ├── gpt4o.cpython-311.pyc
    │       ├── moonshot.cpython-311.pyc
    │       ├── qwen2_72b_instruct.cpython-311.pyc
    │       ├── qwen_2_72b_instruct.cpython-311.pyc
    │       └── yi_large.cpython-311.pyc
    │   ├── abab.py
    │   ├── claude_3_5_sonnet.py
    │   ├── deepseek_v2.py
    │   ├── deepseek_v2_lite_chat.py
    │   ├── ernie35.py
    │   ├── ernie4.py
    │   ├── ernie4turbo.py
    │   ├── glm4.py
    │   ├── gpt35_turbo_1106.py
    │   ├── gpt4_0125_preview.py
    │   ├── gpt4_turbo_0409.py
    │   ├── gpt4o.py
    │   ├── moonshot.py
    │   ├── qwen15_110b_chat.py
    │   ├── qwen2_72b_instruct.py
    │   ├── yi_15_34b_chat.py
    │   └── yi_large.py
├── data
    └── cfbench_data.json
├── output
    ├── judge
    │   ├── abab_eval.json
    │   ├── deepseek_v2_eval.json
    │   ├── ernie35_eval.json
    │   ├── ernie4_eval.json
    │   ├── glm4_eval.json
    │   ├── gpt35_turbo_1106_eval.json
    │   ├── gpt4_0125_preview_eval.json
    │   ├── gpt4_turbo_0409_eval.json
    │   ├── gpt4o_eval.json
    │   ├── moonshot_eval.json
    │   └── yi_large_eval.json
    ├── response
    │   ├── abab_infer.json
    │   ├── deepseek_v2_infer.json
    │   ├── ernie35_infer.json
    │   ├── ernie4_infer.json
    │   ├── ernie4turbo_infer.json
    │   ├── glm4_infer.json
    │   ├── gpt35_turbo_1106_infer.json
    │   ├── gpt4_0125_preview_infer.json
    │   ├── gpt4_turbo_0409_infer.json
    │   ├── gpt4o_infer.json
    │   ├── moonshot_infer.json
    │   └── yi_large_infer.json
    └── scores.xlsx
├── requirements.txt
├── resources
    ├── img
    │   ├── 1_introduction_case.png
    │   ├── 2_pipline.png
    │   ├── 4_constraints_results.png
    │   ├── 5_domain_nlp_results.png
    │   └── leaderboard.png
    └── paper
    │   └── CFBench- A Comprehensive Constraints-Following Benchmark for LLMs.pdf
└── run.sh


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/README.md


--------------------------------------------------------------------------------
/code/evalaute.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/evalaute.py


--------------------------------------------------------------------------------
/code/inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/inference.py


--------------------------------------------------------------------------------
/code/models/__pycache__/abab.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/abab.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/deepseek_v2.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/deepseek_v2.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/ernie35.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/ernie35.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/ernie4.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/ernie4.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/ernie4turbo.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/ernie4turbo.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/glm4.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/glm4.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/gpt35_turbo_1106.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/gpt35_turbo_1106.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/gpt4_0125_preview.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/gpt4_0125_preview.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/gpt4_turbo_0409.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/gpt4_turbo_0409.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/gpt4o.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/gpt4o.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/moonshot.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/moonshot.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/qwen2_72b_instruct.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/qwen2_72b_instruct.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/qwen_2_72b_instruct.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/qwen_2_72b_instruct.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/__pycache__/yi_large.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/__pycache__/yi_large.cpython-311.pyc


--------------------------------------------------------------------------------
/code/models/abab.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/abab.py


--------------------------------------------------------------------------------
/code/models/claude_3_5_sonnet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/claude_3_5_sonnet.py


--------------------------------------------------------------------------------
/code/models/deepseek_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/deepseek_v2.py


--------------------------------------------------------------------------------
/code/models/deepseek_v2_lite_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/deepseek_v2_lite_chat.py


--------------------------------------------------------------------------------
/code/models/ernie35.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/ernie35.py


--------------------------------------------------------------------------------
/code/models/ernie4.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/ernie4.py


--------------------------------------------------------------------------------
/code/models/ernie4turbo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/ernie4turbo.py


--------------------------------------------------------------------------------
/code/models/glm4.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/glm4.py


--------------------------------------------------------------------------------
/code/models/gpt35_turbo_1106.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/gpt35_turbo_1106.py


--------------------------------------------------------------------------------
/code/models/gpt4_0125_preview.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/gpt4_0125_preview.py


--------------------------------------------------------------------------------
/code/models/gpt4_turbo_0409.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/gpt4_turbo_0409.py


--------------------------------------------------------------------------------
/code/models/gpt4o.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/gpt4o.py


--------------------------------------------------------------------------------
/code/models/moonshot.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/moonshot.py


--------------------------------------------------------------------------------
/code/models/qwen15_110b_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/qwen15_110b_chat.py


--------------------------------------------------------------------------------
/code/models/qwen2_72b_instruct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/qwen2_72b_instruct.py


--------------------------------------------------------------------------------
/code/models/yi_15_34b_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/yi_15_34b_chat.py


--------------------------------------------------------------------------------
/code/models/yi_large.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/code/models/yi_large.py


--------------------------------------------------------------------------------
/data/cfbench_data.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/data/cfbench_data.json


--------------------------------------------------------------------------------
/output/judge/abab_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/abab_eval.json


--------------------------------------------------------------------------------
/output/judge/deepseek_v2_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/deepseek_v2_eval.json


--------------------------------------------------------------------------------
/output/judge/ernie35_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/ernie35_eval.json


--------------------------------------------------------------------------------
/output/judge/ernie4_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/ernie4_eval.json


--------------------------------------------------------------------------------
/output/judge/glm4_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/glm4_eval.json


--------------------------------------------------------------------------------
/output/judge/gpt35_turbo_1106_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/gpt35_turbo_1106_eval.json


--------------------------------------------------------------------------------
/output/judge/gpt4_0125_preview_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/gpt4_0125_preview_eval.json


--------------------------------------------------------------------------------
/output/judge/gpt4_turbo_0409_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/gpt4_turbo_0409_eval.json


--------------------------------------------------------------------------------
/output/judge/gpt4o_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/gpt4o_eval.json


--------------------------------------------------------------------------------
/output/judge/moonshot_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/moonshot_eval.json


--------------------------------------------------------------------------------
/output/judge/yi_large_eval.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/judge/yi_large_eval.json


--------------------------------------------------------------------------------
/output/response/abab_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/abab_infer.json


--------------------------------------------------------------------------------
/output/response/deepseek_v2_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/deepseek_v2_infer.json


--------------------------------------------------------------------------------
/output/response/ernie35_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/ernie35_infer.json


--------------------------------------------------------------------------------
/output/response/ernie4_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/ernie4_infer.json


--------------------------------------------------------------------------------
/output/response/ernie4turbo_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/ernie4turbo_infer.json


--------------------------------------------------------------------------------
/output/response/glm4_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/glm4_infer.json


--------------------------------------------------------------------------------
/output/response/gpt35_turbo_1106_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/gpt35_turbo_1106_infer.json


--------------------------------------------------------------------------------
/output/response/gpt4_0125_preview_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/gpt4_0125_preview_infer.json


--------------------------------------------------------------------------------
/output/response/gpt4_turbo_0409_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/gpt4_turbo_0409_infer.json


--------------------------------------------------------------------------------
/output/response/gpt4o_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/gpt4o_infer.json


--------------------------------------------------------------------------------
/output/response/moonshot_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/moonshot_infer.json


--------------------------------------------------------------------------------
/output/response/yi_large_infer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/response/yi_large_infer.json


--------------------------------------------------------------------------------
/output/scores.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/output/scores.xlsx


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/requirements.txt


--------------------------------------------------------------------------------
/resources/img/1_introduction_case.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/resources/img/1_introduction_case.png


--------------------------------------------------------------------------------
/resources/img/2_pipline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/resources/img/2_pipline.png


--------------------------------------------------------------------------------
/resources/img/4_constraints_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/resources/img/4_constraints_results.png


--------------------------------------------------------------------------------
/resources/img/5_domain_nlp_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/resources/img/5_domain_nlp_results.png


--------------------------------------------------------------------------------
/resources/img/leaderboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/resources/img/leaderboard.png


--------------------------------------------------------------------------------
/resources/paper/CFBench- A Comprehensive Constraints-Following Benchmark for LLMs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/resources/paper/CFBench- A Comprehensive Constraints-Following Benchmark for LLMs.pdf


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-Baichuan-MLSystemLab/CFBench/HEAD/run.sh


--------------------------------------------------------------------------------