├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── .DS_Store
    ├── csharp
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── go
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── java
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── javascript
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── kotlin
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── perl
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── php
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── python
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── ruby
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── scala
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    ├── swift
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
    └── typescript
    │   ├── Afrikaans.jsonl
    │   ├── Arabic.jsonl
    │   ├── Bulgarian.jsonl
    │   ├── Chinese.jsonl
    │   ├── Dutch.jsonl
    │   ├── English.jsonl
    │   ├── Estonian.jsonl
    │   ├── Finnish.jsonl
    │   ├── French.jsonl
    │   ├── German.jsonl
    │   ├── Greek.jsonl
    │   ├── Hebrew.jsonl
    │   ├── Hungarian.jsonl
    │   ├── Indonesian.jsonl
    │   ├── Italian.jsonl
    │   ├── Malay.jsonl
    │   ├── Persian.jsonl
    │   ├── Portuguese.jsonl
    │   ├── Russian.jsonl
    │   ├── Spanish.jsonl
    │   ├── Tagalog.jsonl
    │   ├── Turkish.jsonl
    │   └── Vietnamese.jsonl
├── mxeval
    ├── .gitignore
    ├── CHANGELOG.md
    ├── LICENSE
    ├── NOTICE
    ├── THIRD_PARTY_LICENSES
    ├── graphics
    │   ├── mbxp_java_conversion.png
    │   └── paper_summary.png
    ├── language_setup
    │   ├── amazon_linux_ami.sh
    │   └── ubuntu.sh
    ├── mxeval
    │   ├── __init__.py
    │   ├── data.py
    │   ├── evaluate_functional_correctness.py
    │   ├── evaluation.py
    │   └── execution.py
    ├── requirements.txt
    ├── resources
    │   └── eval_csproj.zip
    └── setup.py
├── python_chinese_generated_samples.jsonl
└── src
    └── data_process.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 FloatAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # [LREC-COLING 2024 | HumanEval-XL: An Execution-based Multilingual Code Generation Benchmark Across 23 Natural Languages and 12 Programming Languages](https://aclanthology.org/2024.lrec-main.735/)
  2 | 
  3 |    <a href="https://huggingface.co/datasets/floatai/HumanEval-XL" target="_blank">
  4 |       <img alt="Datasets" src="https://img.shields.io/badge/📚-Datasets-green" />
  5 |    </a>
  6 |   <a href="https://aclanthology.org/2024.lrec-main.735/" target="_blank">
  7 |       <img alt="Paper" src="https://img.shields.io/badge/📜-Paper-purple" />
  8 |    </a>
  9 |   <a href="https://lrec-coling-2024.org/" target="_blank">
 10 |       <img alt="LREC-COLING 2024" src="https://img.shields.io/badge/Proceedings-COLING 2024-red" />
 11 |    </a>
 12 | 
 13 | 
 14 | This repository contains data and evaluation code for the paper "[HumanEval-XL: A Multilingual Code Generation Benchmark for Cross-lingual Natural Language Generalization](https://aclanthology.org/2024.lrec-main.735.pdf)".
 15 | 
 16 | 
 17 | ## 🔥 News
 18 | * **26 February, 2024:** 🎉 We release the official codebase and data! [[GitHub](https://github.com/floatai/HumanEval-XL/tree/main?tab=readme-ov-file#dataset),[
 19 | 🤗dataset](https://huggingface.co/datasets/floatai/HumanEval-XL)] 🔥
 20 | * **19 February, 2024:** 🎉 Our work has been accepted to [LREC-COLING 2024](https://lrec-coling-2024.org/)! ✨
 21 | 
 22 | ## 🌟 Overview
 23 | 
 24 | <div align="center">
 25 |   <img src="src/data_process.png">
 26 | </div>
 27 | 
 28 | Large language models (LLMs) have made significant progress in generating codes from textual prompts. However, existing benchmarks have mainly concentrated on translating English prompts to multilingual codes or have been constrained to very limited natural languages (NLs). These benchmarks have overlooked the vast landscape of massively multilingual NL to multilingual code, leaving a critical gap in the evaluation of multilingual LLMs. In response, we introduce HumanEval-XL, a massively multilingual code generation benchmark specifically crafted to address this deficiency. HumanEval-XL establishes connections between 23 NLs and 12 programming languages (PLs), and comprises of a collection of 22,080 prompts with an average of 8.33 test cases. By ensuring *parallel* data across multiple NLs and PLs, HumanEval-XL offers a comprehensive evaluation platform for multilingual LLMs, allowing the assessment of the understanding of different NLs. Our work serves as a pioneering step towards filling the void in evaluating NL generalization in the area of multilingual code generation. We make our evaluation code and data publicly available at [https://github.com/floatai/HumanEval-XL](https://github.com/floatai/HumanEval-XL).
 29 | 
 30 | <img width="70%" alt="image" src="https://github.com/floatai/HumanEval-XL/assets/13767887/e5b7a96e-20a6-4f17-a380-13c8b5ffbc8a">
 31 | 
 32 | 
 33 | ## Dataset
 34 | The data is stored in `data/program_language/natural_language/`. We have 80 parallel problems in 23 different natural languages and 12 programming languages. 
 35 | 
 36 | **23 NLs** are:
 37 | "English", "Russian", "Chinese", "German", "Spanish", "French", "Italian", "Portuguese", "Greek", "Hungarian", "Dutch", "Finnish", "Indonesian", "Turkish", "Arabic", "Vietnamese", "Bulgarian", "Persian", "Malay", "Hebrew", "Estonian", "Tagalog", "Afrikaans"
 38 | 
 39 | **12 PLs** are:
 40 | "python", "java", "javascript", "csharp", "go", "kotlin", "perl", "php", "ruby", "scala", "swift", "typescript"
 41 | 
 42 | 
 43 | <img width="60%" alt="image" src="https://github.com/floatai/HumanEval-XL/assets/13767887/37023fcd-4c7e-41bf-8323-c5fcb5ac36a4">
 44 | 
 45 | 
 46 | ### Usage with HuggingFace datasets🤗
 47 | You can also use [🤗**HuggingFace datasets**](https://huggingface.co/datasets/floatai/HumanEval-XL) to load a specific dataset and language of our dataset!!!
 48 | ```python
 49 | from datasets import load_dataset
 50 | dataset = load_dataset("floatai/HumanEval-XL", "python")
 51 | DatasetDict({
 52 |     English: Dataset({
 53 |         features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'],
 54 |         num_rows: 80
 55 |     })
 56 |     Russian: Dataset({
 57 |         features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'],
 58 |         num_rows: 80
 59 |     })
 60 |     Chinese: Dataset({
 61 |         features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'],
 62 |         num_rows: 80
 63 |     })
 64 | 
 65 |     ⋮
 66 | 
 67 |     Afrikaans: Dataset({
 68 |         features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'],
 69 |         num_rows: 80
 70 |     })
 71 | })
 72 | 
 73 | ```
 74 | 
 75 | If you have error in loading the data, please try force_redownload:
 76 | ```python
 77 | dataset = load_dataset("floatai/HumanEval-XL", "python", download_mode="force_redownload")
 78 | ```
 79 | 
 80 | ### Data Instances
 81 | 
 82 | An example of a dataset instance (In python split with Chinese prompts - dataset["Chinese"][0]):
 83 | 
 84 | ```python
 85 | {
 86 | 'task_id': 'python/0',
 87 | 'language': 'python',
 88 | 'prompt': 'from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n    """ 你会得到一个银行账户的存款和取款操作列表，该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下，并在该点返回True。否则应返回False。\n    \n    >>> below_zero([1, 2, 3])\n    False\n    >>> below_zero([1, 2, -4, 5])\n    True\n    """\n',
 89 | 'description': '你会得到一个银行账户的存款和取款操作列表，该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下，并在该点返回True。否则应返回False。\n    ',
 90 | 'test': "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == False\n    assert candidate([1, 2, -3, 1, 2, -3]) == False\n    assert candidate([1, 2, -4, 5, 6]) == True\n    assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\n    assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\n    assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\n",
 91 | 'entry_point': 'below_zero',
 92 | 'canonical_solution': '    balance = 0\n\n    for op in operations:\n        balance += op\n        if balance < 0:\n            return True\n\n    return False\n',
 93 | 'natural_language': 'Chinese'
 94 | }
 95 | ```
 96 | 
 97 | ### Data Fields
 98 | 
 99 | - `task_id`: identifier for the data sample
100 | - `prompt`: input for the model containing function header and docstrings
101 | - `canonical_solution`: solution for the problem in the `prompt`
102 | - `description`: task description
103 | - `test`: contains function to test generated code for correctness
104 | - `entry_point`: entry point for test
105 | - `language`: programming lanuage identifier to call the appropriate subprocess call for program execution
106 | - `natural_language`: natural language identifier to show the language the prompt is in
107 | 
108 | 
109 | ### Data Splits
110 | programming languages are used to speicify splits:
111 |  - python 
112 |  - java 
113 |  - javascript
114 |  - csharp
115 |  - go
116 |  - kotlin
117 |  - php
118 |  - perl
119 |  - ruby
120 |  - swift
121 |  - scala
122 |  - typescript
123 | 
124 | ## Evaluation
125 | ### Installation
126 | 
127 | Check out and install this repository:
128 | ```
129 | git clone git@github.com:floatai/HumanEval-XL.git
130 | cd mxeval
131 | pip install -e mxeval
132 | ```
133 | 
134 | ### Dependencies
135 | We provide scripts to help set up programming language dependencies that are used to execute and evaluate using dataset.
136 | (We use the same scripts from https://github.com/amazon-science/mxeval for code generation evaluation)
137 | 
138 | #### Amazon Linux AMI
139 | ```
140 | bash language_setup/amazon_linux_ami.sh
141 | ```
142 | #### Ubuntu
143 | ```
144 | bash language_setup/ubuntu.sh
145 | ```
146 | 
147 | ## Evaluation Usage
148 | 
149 | **This program exists to run untrusted model-generated code. Users are strongly
150 | encouraged not to do so outside of a robust security sandbox. See the comment in
151 | `execution.py` for more information and instructions.**
152 | (We use the same scripts from https://github.com/amazon-science/mxeval for code generation evaluation)
153 | 
154 | Each sample is formatted into a single line:
155 | ```
156 | {"task_id": "Corresponding task ID", "completion": "Completion only without the prompt",
157 | "language": "programming language name"}
158 | ```
159 | We provide `python_chinese_generated_samples.jsonl` to illustrate the format. 
160 | 
161 | Here is nearly functional example code (you just have to provide
162 | `generate_one_completion` to make it work) that saves generated completions to
163 | `samples.jsonl`.
164 | ```
165 | from mxeval.data import write_jsonl, read_problems
166 | 
167 | problems = read_problems()
168 | 
169 | num_samples_per_task = 200
170 | samples = [
171 |     dict(task_id=task_id, language=problems[task_id]["language"], completion=generate_one_completion(problems[task_id]["prompt"]))
172 |     for task_id in problems
173 |     for _ in range(num_samples_per_task)
174 | ]
175 | write_jsonl("samples.jsonl", samples)
176 | ```
177 | 
178 | To evaluate the samples for, e.g., Python, Chinese evaluation, run 
179 | ```
180 | evaluate_functional_correctness python_chinese_generated_samples.jsonl --problem_file data/python/Chinese.jsonl
181 | ```
182 | 
183 | Note: Because there is no unbiased way of estimating pass@k when there are fewer
184 | samples than k, the script does not evaluate pass@k for these cases. To
185 | evaluate with other k values, pass `--k <comma-separated-values-here>`. For
186 | other options, see
187 | ```
188 | $ evaluate_functional_correctness --help
189 | ```
190 | However, we recommend that you use the default values for the rest.
191 | 
192 | ## Credits
193 | We adapted Amazon-science's mxeval package (https://github.com/amazon-science/mxeval) for the evaluation. We thank Amazon for their pioneering effort in this field including the release of the dataset and evaluation code.
194 | 
195 | We also appreciate the open-source contributions on [`floatai/HumanEval-XL`](https://huggingface.co/datasets/floatai/HumanEval-XL) dataset:
196 | - [Neil77/HumanEval-XL](https://huggingface.co/datasets/iNeil77/HumanEval-XL)
197 | - vllm code evaluation: [iNeil77/vllm-code-harness](https://github.com/iNeil77/vllm-code-harness)
198 | 
199 | ## Citation
200 | 
201 | ```
202 | @inproceedings{peng-etal-2024-humaneval,
203 |     title = "{H}uman{E}val-{XL}: A Multilingual Code Generation Benchmark for Cross-lingual Natural Language Generalization",
204 |     author = "Peng, Qiwei  and
205 |       Chai, Yekun  and
206 |       Li, Xuhong",
207 |     editor = "Calzolari, Nicoletta  and
208 |       Kan, Min-Yen  and
209 |       Hoste, Veronique  and
210 |       Lenci, Alessandro  and
211 |       Sakti, Sakriani  and
212 |       Xue, Nianwen",
213 |     booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
214 |     month = may,
215 |     year = "2024",
216 |     address = "Torino, Italia",
217 |     publisher = "ELRA and ICCL",
218 |     url = "https://aclanthology.org/2024.lrec-main.735/",
219 |     pages = "8383--8394",
220 |     abstract = "Large language models (LLMs) have made significant progress in generating codes from textual prompts. However, existing benchmarks have mainly concentrated on translating English prompts to multilingual codes or have been constrained to very limited natural languages (NLs). These benchmarks have overlooked the vast landscape of massively multilingual NL to multilingual code, leaving a critical gap in the evaluation of multilingual LLMs. In response, we introduce HumanEval-XL, a massively multilingual code generation benchmark specifically crafted to address this deficiency. HumanEval-XL establishes connections between 23 NLs and 12 programming languages (PLs), and comprises of a collection of 22,080 prompts with an average of 8.33 test cases. By ensuring parallel data across multiple NLs and PLs, HumanEval-XL offers a comprehensive evaluation platform for multilingual LLMs, allowing the assessment of the understanding of different NLs. Our work serves as a pioneering step towards filling the void in evaluating NL generalization in the area of multilingual code generation. We make our evaluation code and data publicly available at https://github.com/floatai/HumanEval-XL."
221 | }
222 | ```
223 | 


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/data/.DS_Store


--------------------------------------------------------------------------------
/data/python/Chinese.jsonl:
--------------------------------------------------------------------------------
 1 | {"task_id": "python/0", "prompt": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n    \"\"\" 你会得到一个银行账户的存款和取款操作列表，该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下，并在该点返回True。否则应返回False。\n    \n    >>> below_zero([1, 2, 3])\n    False\n    >>> below_zero([1, 2, -4, 5])\n    True\n    \"\"\"\n", "entry_point": "below_zero", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == False\n    assert candidate([1, 2, -3, 1, 2, -3]) == False\n    assert candidate([1, 2, -4, 5, 6]) == True\n    assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\n    assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\n    assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\n", "language": "python", "canonical_solution": "    balance = 0\n\n    for op in operations:\n        balance += op\n        if balance < 0:\n            return True\n\n    return False\n", "description": "你会得到一个银行账户的存款和取款操作列表，该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下，并在该点返回True。否则应返回False。\n    ", "natural_language": "Chinese"}
 2 | {"task_id": "python/1", "prompt": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n    \"\"\" 对于给定的整数列表，返回一个元组，其中包含列表中所有整数的和与积。\n空列表的和应该等于0，空列表的积应该等于1。\n    \n    >>> sum_product([])\n    (0, 1)\n    >>> sum_product([1, 2, 3, 4])\n    (10, 24)\n    \"\"\"\n", "entry_point": "sum_product", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == (0, 1)\n    assert candidate([1, 1, 1]) == (3, 1)\n    assert candidate([100, 0]) == (100, 0)\n    assert candidate([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\n    assert candidate([10]) == (10, 10)\n", "language": "python", "canonical_solution": "    sum_value = 0\n    prod_value = 1\n\n    for n in numbers:\n        sum_value += n\n        prod_value *= n\n    return sum_value, prod_value\n", "description": "对于给定的整数列表，返回一个元组，其中包含列表中所有整数的和与积。\n空列表的和应该等于0，空列表的积应该等于1。\n    ", "natural_language": "Chinese"}
 3 | {"task_id": "python/2", "prompt": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n    \"\"\" 输入两个仅由1和0组成的字符串a和b。\n对这些输入执行二进制异或，并将结果作为字符串返回。\n    \n    >>> string_xor('010', '110')\n    '100'\n    \"\"\"\n", "entry_point": "string_xor", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('111000', '101010') == '010010'\n    assert candidate('1', '1') == '0'\n    assert candidate('0101', '0000') == '0101'\n", "language": "python", "canonical_solution": "    def xor(i, j):\n        if i == j:\n            return '0'\n        else:\n            return '1'\n\n    return ''.join(xor(x, y) for x, y in zip(a, b))\n", "description": "输入两个仅由1和0组成的字符串a和b。\n对这些输入执行二进制异或，并将结果作为字符串返回。\n    ", "natural_language": "Chinese"}
 4 | {"task_id": "python/3", "prompt": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n    \"\"\" 从字符串列表中返回最长的字符串。如果有多个长度相同的字符串，则返回第一个字符串。如果输入列表为空，则返回null。\n    \n    >>> longest([])\n\n    >>> longest(['a', 'b', 'c'])\n    'a'\n    >>> longest(['a', 'bb', 'ccc'])\n    'ccc'\n    \"\"\"\n", "entry_point": "longest", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == None\n    assert candidate(['x', 'y', 'z']) == 'x'\n    assert candidate(['x', 'yyy', 'zzzz', 'www', 'kkkk', 'abc']) == 'zzzz'\n", "language": "python", "canonical_solution": "    if not strings:\n        return None\n\n    maxlen = max(len(x) for x in strings)\n    for s in strings:\n        if len(s) == maxlen:\n            return s\n", "description": "从字符串列表中返回最长的字符串。如果有多个长度相同的字符串，则返回第一个字符串。如果输入列表为空，则返回null。\n    ", "natural_language": "Chinese"}
 5 | {"task_id": "python/4", "prompt": "\n\ndef greatest_common_divisor(a: int, b: int) -> int:\n    \"\"\" 返回两个整数a和b的最大公约数\n    \n    >>> greatest_common_divisor(3, 5)\n    1\n    >>> greatest_common_divisor(25, 15)\n    5\n    \"\"\"\n", "entry_point": "greatest_common_divisor", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate(3, 7) == 1\n    assert candidate(10, 15) == 5\n    assert candidate(49, 14) == 7\n    assert candidate(144, 60) == 12\n", "language": "python", "canonical_solution": "    while b:\n        a, b = b, a % b\n    return a\n", "description": "返回两个整数a和b的最大公约数\n    ", "natural_language": "Chinese"}
 6 | {"task_id": "python/5", "prompt": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n    \"\"\" 输入一个由“零”到“九”数字组成的以空格分隔的字符串。\n有效选择为“零”、“一”、“二”、“三”、“四”、“五”、“六”、“七”、“八”和“九”。\n返回按从小到大排序的数字字符串。\n    \n    >>> sort_numbers('three one five')\n    'one three five'\n    \"\"\"\n", "entry_point": "sort_numbers", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('') == ''\n    assert candidate('three') == 'three'\n    assert candidate('three five nine') == 'three five nine'\n    assert candidate('five zero four seven nine eight') == 'zero four five seven eight nine'\n    assert candidate('six five four three two one zero') == 'zero one two three four five six'\n", "language": "python", "canonical_solution": "    value_map = {\n        'zero': 0,\n        'one': 1,\n        'two': 2,\n        'three': 3,\n        'four': 4,\n        'five': 5,\n        'six': 6,\n        'seven': 7,\n        'eight': 8,\n        'nine': 9\n    }\n    return ' '.join(sorted([x for x in numbers.split(' ') if x], key=lambda x: value_map[x]))\n", "description": "输入一个由“零”到“九”数字组成的以空格分隔的字符串。\n有效选择为“零”、“一”、“二”、“三”、“四”、“五”、“六”、“七”、“八”和“九”。\n返回按从小到大排序的数字字符串。\n    ", "natural_language": "Chinese"}
 7 | {"task_id": "python/6", "prompt": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n    \"\"\" 给定一个数字列表（至少有两个元素），对该列表应用线性变换，使最小的数字变为0，最大的数字变为1。\n    \n    >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n    [0.0, 0.25, 0.5, 0.75, 1.0]\n    \"\"\"\n", "entry_point": "rescale_to_unit", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([2.0, 49.9]) == [0.0, 1.0]\n    assert candidate([100.0, 49.9]) == [1.0, 0.0]\n    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0]) == [0.0, 0.25, 0.5, 0.75, 1.0]\n    assert candidate([2.0, 1.0, 5.0, 3.0, 4.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n    assert candidate([12.0, 11.0, 15.0, 13.0, 14.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n", "language": "python", "canonical_solution": "    min_number = min(numbers)\n    max_number = max(numbers)\n    return [(x - min_number) / (max_number - min_number) for x in numbers]\n", "description": "给定一个数字列表（至少有两个元素），对该列表应用线性变换，使最小的数字变为0，最大的数字变为1。\n    ", "natural_language": "Chinese"}
 8 | {"task_id": "python/7", "prompt": "\n\ndef flip_case(string: str) -> str:\n    \"\"\" 对于给定的字符串，将小写字符翻转为大写，将大写字符翻转为小写。\n    \n    >>> flip_case('Hello')\n    'hELLO'\n    \"\"\"\n", "entry_point": "flip_case", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('') == ''\n    assert candidate('Hello!') == 'hELLO!'\n    assert candidate('These violent delights have violent ends') == 'tHESE VIOLENT DELIGHTS HAVE VIOLENT ENDS'\n", "language": "python", "canonical_solution": "    return string.swapcase()\n", "description": "对于给定的字符串，将小写字符翻转为大写，将大写字符翻转为小写。\n    ", "natural_language": "Chinese"}
 9 | {"task_id": "python/8", "prompt": "\n\ndef get_positive(l: list):\n    \"\"\"返回列表中仅为正数的数字。\n    \n    >>> get_positive([-1, 2, -4, 5, 6])\n    [2, 5, 6]\n    >>> get_positive([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n    [5, 3, 2, 3, 9, 123, 1]\n    \"\"\"\n", "entry_point": "get_positive", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([-1, -2, 4, 5, 6]) == [4, 5, 6]\n    assert candidate([5, 3, -5, 2, 3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 3, 9, 123, 1]\n    assert candidate([-1, -2]) == []\n    assert candidate([]) == []\n\n", "language": "python", "canonical_solution": "    return [e for e in l if e > 0]\n", "description": "返回列表中仅为正数的数字。\n    ", "natural_language": "Chinese"}
10 | {"task_id": "python/9", "prompt": "\n\ndef is_prime(n):\n    \"\"\"如果给定的数字是质数，则返回true，否则返回false。\n    \n    >>> is_prime(6)\n    False\n    >>> is_prime(101)\n    True\n    >>> is_prime(11)\n    True\n    >>> is_prime(13441)\n    True\n    >>> is_prime(61)\n    True\n    >>> is_prime(4)\n    False\n    >>> is_prime(1)\n    False\n    \"\"\"\n", "entry_point": "is_prime", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(6) == False\n    assert candidate(101) == True\n    assert candidate(11) == True\n    assert candidate(13441) == True\n    assert candidate(61) == True\n    assert candidate(4) == False\n    assert candidate(1) == False\n    assert candidate(5) == True\n    assert candidate(11) == True\n    assert candidate(17) == True\n    assert candidate(5 * 17) == False\n    assert candidate(11 * 7) == False\n    assert candidate(13441 * 19) == False\n\n", "language": "python", "canonical_solution": "    if n < 2:\n        return False\n    for k in range(2, n - 1):\n        if n % k == 0:\n            return False\n    return True\n", "description": "如果给定的数字是质数，则返回true，否则返回false。\n    ", "natural_language": "Chinese"}
11 | {"task_id": "python/10", "prompt": "\n\ndef unique(l: list):\n    \"\"\"返回列表中排序后的唯一元素\n    \n    >>> unique([5, 3, 5, 2, 3, 3, 9, 0, 123])\n    [0, 2, 3, 5, 9, 123]\n    \"\"\"\n", "entry_point": "unique", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([5, 3, 5, 2, 3, 3, 9, 0, 123]) == [0, 2, 3, 5, 9, 123]\n\n", "language": "python", "canonical_solution": "    return sorted(list(set(l)))\n", "description": "返回列表中排序后的唯一元素\n    ", "natural_language": "Chinese"}
12 | {"task_id": "python/11", "prompt": "\n\ndef prime_fib(n: int):\n    \"\"\"\n    prime_fib 返回第 n 个既是斐波那契数又是质数的数。\n    \n    >>> prime_fib(1)\n    2\n    >>> prime_fib(2)\n    3\n    >>> prime_fib(3)\n    5\n    >>> prime_fib(4)\n    13\n    >>> prime_fib(5)\n    89\n    \"\"\"\n", "entry_point": "prime_fib", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(1) == 2\n    assert candidate(2) == 3\n    assert candidate(3) == 5\n    assert candidate(4) == 13\n    assert candidate(5) == 89\n    assert candidate(6) == 233\n    assert candidate(7) == 1597\n    assert candidate(8) == 28657\n    assert candidate(9) == 514229\n    assert candidate(10) == 433494437\n\n", "language": "python", "canonical_solution": "    import math\n\n    def is_prime(p):\n        if p < 2:\n            return False\n        for k in range(2, min(int(math.sqrt(p)) + 1, p - 1)):\n            if p % k == 0:\n                return False\n        return True\n    f = [0, 1]\n    while True:\n        f.append(f[-1] + f[-2])\n        if is_prime(f[-1]):\n            n -= 1\n        if n == 0:\n            return f[-1]\n", "description": "prime_fib 返回第 n 个既是斐波那契数又是质数的数。\n    ", "natural_language": "Chinese"}
13 | {"task_id": "python/12", "prompt": "\n\ndef triples_sum_to_zero(l: list):\n    \"\"\"\n    triples_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在三个不同的元素相加等于零，则返回True，否则返回False。\n    \n\n    >>> triples_sum_to_zero([1, 3, 5, 0])\n    False\n    >>> triples_sum_to_zero([1, 3, -2, 1])\n    True\n    >>> triples_sum_to_zero([1, 2, 3, 7])\n    False\n    >>> triples_sum_to_zero([2, 4, -5, 3, 9, 7])\n    True\n    >>> triples_sum_to_zero([1])\n    False\n    \"\"\"\n", "entry_point": "triples_sum_to_zero", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 3, 5, 0]) == False\n    assert candidate([1, 3, 5, -1]) == False\n    assert candidate([1, 3, -2, 1]) == True\n    assert candidate([1, 2, 3, 7]) == False\n    assert candidate([1, 2, 5, 7]) == False\n    assert candidate([2, 4, -5, 3, 9, 7]) == True\n    assert candidate([1]) == False\n    assert candidate([1, 3, 5, -100]) == False\n    assert candidate([100, 3, 5, -100]) == False\n\n", "language": "python", "canonical_solution": "    for i in range(len(l)):\n        for j in range(i + 1, len(l)):\n            for k in range(j + 1, len(l)):\n                if l[i] + l[j] + l[k] == 0:\n                    return True\n    return False\n", "description": "triples_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在三个不同的元素相加等于零，则返回True，否则返回False。\n    ", "natural_language": "Chinese"}
14 | {"task_id": "python/13", "prompt": "\n\ndef pairs_sum_to_zero(l):\n    \"\"\"\n    pairs_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在两个不同的元素相加等于零，则返回True，否则返回False。\n    \n    >>> pairs_sum_to_zero([1, 3, 5, 0])\n    False\n    >>> pairs_sum_to_zero([1, 3, -2, 1])\n    False\n    >>> pairs_sum_to_zero([1, 2, 3, 7])\n    False\n    >>> pairs_sum_to_zero([2, 4, -5, 3, 5, 7])\n    True\n    >>> pairs_sum_to_zero([1])\n    False\n    \"\"\"\n", "entry_point": "pairs_sum_to_zero", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 3, 5, 0]) == False\n    assert candidate([1, 3, -2, 1]) == False\n    assert candidate([1, 2, 3, 7]) == False\n    assert candidate([2, 4, -5, 3, 5, 7]) == True\n    assert candidate([1]) == False\n\n    assert candidate([-3, 9, -1, 3, 2, 30]) == True\n    assert candidate([-3, 9, -1, 3, 2, 31]) == True\n    assert candidate([-3, 9, -1, 4, 2, 30]) == False\n    assert candidate([-3, 9, -1, 4, 2, 31]) == False\n\n", "language": "python", "canonical_solution": "    for i, l1 in enumerate(l):\n        for j in range(i + 1, len(l)):\n            if l1 + l[j] == 0:\n                return True\n    return False\n", "description": "pairs_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在两个不同的元素相加等于零，则返回True，否则返回False。\n    ", "natural_language": "Chinese"}
15 | {"task_id": "python/14", "prompt": "\n\ndef fib4(n: int):\n    \"\"\"Fib4数列是一种类似于斐波那契数列的数列，定义如下：\n    fib4(0) -> 0\n    fib4(1) -> 0\n    fib4(2) -> 2\n    fib4(3) -> 0\n    fib4(n) -> fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4)。\n    请编写一个函数高效地计算Fib4数列的第n个元素。不要使用递归。\n    \n    >>> fib4(5)\n    4\n    >>> fib4(6)\n    8\n    >>> fib4(7)\n    14\n    \"\"\"\n", "entry_point": "fib4", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(5) == 4\n    assert candidate(8) == 28\n    assert candidate(10) == 104\n    assert candidate(12) == 386\n\n", "language": "python", "canonical_solution": "    results = [0, 0, 2, 0]\n    if n < 4:\n        return results[n]\n\n    for _ in range(4, n + 1):\n        results.append(results[-1] + results[-2] + results[-3] + results[-4])\n        results.pop(0)\n\n    return results[-1]\n", "description": "Fib4数列是一种类似于斐波那契数列的数列，定义如下：\n    fib4(0) -> 0\n    fib4(1) -> 0\n    fib4(2) -> 2\n    fib4(3) -> 0\n    fib4(n) -> fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4)。\n    请编写一个函数高效地计算Fib4数列的第n个元素。不要使用递归。\n    ", "natural_language": "Chinese"}
16 | {"task_id": "python/15", "prompt": "\n\ndef median(l: list):\n    \"\"\"返回列表l中元素的中位数。\n    \n    >>> median([3, 1, 2, 4, 5])\n    3\n    >>> median([-10, 4, 6, 1000, 10, 20])\n    15.0\n    \"\"\"\n", "entry_point": "median", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([3, 1, 2, 4, 5]) == 3\n    assert candidate([-10, 4, 6, 1000, 10, 20]) == 8.0\n    assert candidate([5]) == 5\n    assert candidate([6, 5]) == 5.5\n    assert candidate([8, 1, 3, 9, 9, 2, 7]) == 7 \n\n", "language": "python", "canonical_solution": "    l = sorted(l)\n    if len(l) % 2 == 1:\n        return l[len(l) // 2]\n    else:\n        return (l[len(l) // 2 - 1] + l[len(l) // 2]) / 2.0\n", "description": "返回列表l中元素的中位数。\n    ", "natural_language": "Chinese"}
17 | {"task_id": "python/16", "prompt": "\n\ndef is_palindrome(text: str):\n    \"\"\"\n    检查给定的字符串是否为回文。\n    \n    >>> is_palindrome('')\n    True\n    >>> is_palindrome('aba')\n    True\n    >>> is_palindrome('aaaaa')\n    True\n    >>> is_palindrome('zbcd')\n    False\n    \"\"\"\n", "entry_point": "is_palindrome", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate('') == True\n    assert candidate('aba') == True\n    assert candidate('aaaaa') == True\n    assert candidate('zbcd') == False\n    assert candidate('xywyx') == True\n    assert candidate('xywyz') == False\n    assert candidate('xywzx') == False\n\n", "language": "python", "canonical_solution": "    for i in range(len(text)):\n        if text[i] != text[len(text) - 1 - i]:\n            return False\n    return True\n", "description": "检查给定的字符串是否为回文。\n    ", "natural_language": "Chinese"}
18 | {"task_id": "python/17", "prompt": "\n\ndef remove_vowels(text):\n    \"\"\"\n    remove_vowels是一个函数，它接受一个字符串并返回没有元音字母的字符串。\n    \n    >>> remove_vowels('')\n    ''\n    >>> remove_vowels(\"abcdef\\nghijklm\")\n    'bcdf\\nghjklm'\n    >>> remove_vowels('abcdef')\n    'bcdf'\n    >>> remove_vowels('aaaaa')\n    ''\n    >>> remove_vowels('aaBAA')\n    'B'\n    >>> remove_vowels('zbcd')\n    'zbcd'\n    \"\"\"\n", "entry_point": "remove_vowels", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate('') == ''\n    assert candidate(\"abcdef\\nghijklm\") == 'bcdf\\nghjklm'\n    assert candidate('fedcba') == 'fdcb'\n    assert candidate('eeeee') == ''\n    assert candidate('acBAA') == 'cB'\n    assert candidate('EcBOO') == 'cB'\n    assert candidate('ybcd') == 'ybcd'\n\n", "language": "python", "canonical_solution": "    return \"\".join([s for s in text if s.lower() not in [\"a\", \"e\", \"i\", \"o\", \"u\"]])\n", "description": "remove_vowels是一个函数，它接受一个字符串并返回没有元音字母的字符串。\n    ", "natural_language": "Chinese"}
19 | {"task_id": "python/18", "prompt": "\n\ndef below_threshold(l: list, t: int):\n    \"\"\"如果列表l中的所有数字都低于阈值t，则返回True。\n    \n    >>> below_threshold([1, 2, 4, 10], 100)\n    True\n    >>> below_threshold([1, 20, 4, 10], 5)\n    False\n    \"\"\"\n", "entry_point": "below_threshold", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 2, 4, 10], 100)\n    assert not candidate([1, 20, 4, 10], 5)\n    assert candidate([1, 20, 4, 10], 21)\n    assert candidate([1, 20, 4, 10], 22)\n    assert candidate([1, 8, 4, 10], 11)\n    assert not candidate([1, 8, 4, 10], 10)\n\n", "language": "python", "canonical_solution": "    for e in l:\n        if e >= t:\n            return False\n    return True\n", "description": "如果列表l中的所有数字都低于阈值t，则返回True。\n    ", "natural_language": "Chinese"}
20 | {"task_id": "python/19", "prompt": "\n\ndef add(x: int, y: int):\n    \"\"\"添加两个数字 x 和 y\n    \n    >>> add(2, 3)\n    5\n    >>> add(5, 7)\n    12\n    \"\"\"\n", "entry_point": "add", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    import random\n\n    assert candidate(0, 1) == 1\n    assert candidate(1, 0) == 1\n    assert candidate(2, 3) == 5\n    assert candidate(5, 7) == 12\n    assert candidate(7, 5) == 12\n\n    for i in range(100):\n        x, y = random.randint(0, 1000), random.randint(0, 1000)\n        assert candidate(x, y) == x + y\n\n", "language": "python", "canonical_solution": "    return x + y\n", "description": "添加两个数字 x 和 y\n    ", "natural_language": "Chinese"}
21 | {"task_id": "python/20", "prompt": "\n\ndef same_chars(s0: str, s1: str):\n    \"\"\"\n    检查两个单词是否具有相同的字符。\n    \n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')\n    True\n    >>> same_chars('abcd', 'dddddddabc')\n    True\n    >>> same_chars('dddddddabc', 'abcd')\n    True\n    >>> same_chars('eabcd', 'dddddddabc')\n    False\n    >>> same_chars('abcd', 'dddddddabce')\n    False\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')\n    False\n    \"\"\"\n", "entry_point": "same_chars", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate('eabcdzzzz', 'dddzzzzzzzddeddabc') == True\n    assert candidate('abcd', 'dddddddabc') == True\n    assert candidate('dddddddabc', 'abcd') == True\n    assert candidate('eabcd', 'dddddddabc') == False\n    assert candidate('abcd', 'dddddddabcf') == False\n    assert candidate('eabcdzzzz', 'dddzzzzzzzddddabc') == False\n    assert candidate('aabb', 'aaccc') == False\n\n", "language": "python", "canonical_solution": "    return set(s0) == set(s1)\n", "description": "检查两个单词是否具有相同的字符。\n    ", "natural_language": "Chinese"}
22 | {"task_id": "python/21", "prompt": "\n\ndef fib(n: int):\n    \"\"\"返回第n个斐波那契数。\n    \n    >>> fib(10)\n    55\n    >>> fib(1)\n    1\n    >>> fib(8)\n    21\n    \"\"\"\n", "entry_point": "fib", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(10) == 55\n    assert candidate(1) == 1\n    assert candidate(8) == 21\n    assert candidate(11) == 89\n    assert candidate(12) == 144\n\n", "language": "python", "canonical_solution": "    if n == 0:\n        return 0\n    if n == 1:\n        return 1\n    return fib(n - 1) + fib(n - 2)\n", "description": "返回第n个斐波那契数。\n    ", "natural_language": "Chinese"}
23 | {"task_id": "python/22", "prompt": "\n\ndef common(l1: list, l2: list):\n    \"\"\"返回两个列表中排序后的唯一公共元素。\n    \n    >>> common([1, 4, 3, 34, 653, 2, 5], [5, 7, 1, 5, 9, 653, 121])\n    [1, 5, 653]\n    >>> common([5, 3, 2, 8], [3, 2])\n    [2, 3]\n\n    \"\"\"\n", "entry_point": "common", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 4, 3, 34, 653, 2, 5], [5, 7, 1, 5, 9, 653, 121]) == [1, 5, 653]\n    assert candidate([5, 3, 2, 8], [3, 2]) == [2, 3]\n    assert candidate([4, 3, 2, 8], [3, 2, 4]) == [2, 3, 4]\n    assert candidate([4, 3, 2, 8], []) == []\n\n", "language": "python", "canonical_solution": "    ret = set()\n    for e1 in l1:\n        for e2 in l2:\n            if e1 == e2:\n                ret.add(e1)\n    return sorted(list(ret))\n", "description": "返回两个列表中排序后的唯一公共元素。\n    ", "natural_language": "Chinese"}
24 | {"task_id": "python/23", "prompt": "\n\ndef largest_prime_factor(n: int):\n    \"\"\"返回n的最大质因数。假设n>1且不是质数。\n    \n    >>> largest_prime_factor(13195)\n    29\n    >>> largest_prime_factor(2048)\n    2\n    \"\"\"\n", "entry_point": "largest_prime_factor", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(15) == 5\n    assert candidate(27) == 3\n    assert candidate(63) == 7\n    assert candidate(330) == 11\n    assert candidate(13195) == 29\n\n", "language": "python", "canonical_solution": "    def is_prime(k):\n        if k < 2:\n            return False\n        for i in range(2, k - 1):\n            if k % i == 0:\n                return False\n        return True\n    largest = 1\n    for j in range(2, n + 1):\n        if n % j == 0 and is_prime(j):\n            largest = max(largest, j)\n    return largest\n", "description": "返回n的最大质因数。假设n>1且不是质数。\n    ", "natural_language": "Chinese"}
25 | {"task_id": "python/24", "prompt": "\n\ndef sum_to_n(n: int):\n    \"\"\"sum_to_n是一个函数，它将从1加到n的数字相加。\n    \n    >>> sum_to_n(30)\n    465\n    >>> sum_to_n(100)\n    5050\n    >>> sum_to_n(5)\n    15\n    >>> sum_to_n(10)\n    55\n    >>> sum_to_n(1)\n    1\n    \"\"\"\n", "entry_point": "sum_to_n", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(1) == 1\n    assert candidate(6) == 21\n    assert candidate(11) == 66\n    assert candidate(30) == 465\n    assert candidate(100) == 5050\n\n", "language": "python", "canonical_solution": "    return sum(range(n + 1))\n", "description": "sum_to_n是一个函数，它将从1加到n的数字相加。\n    ", "natural_language": "Chinese"}
26 | {"task_id": "python/25", "prompt": "\n\ndef derivative(xs: list):\n    \"\"\" xs表示一个多项式的系数。\n    xs[0] + xs[1] * x + xs[2] * x^2 + ....\n    返回该多项式的导数，形式不变。\n    \n    >>> derivative([3, 1, 2, 4, 5])\n    [1, 4, 12, 20]\n    >>> derivative([1, 2, 3])\n    [2, 6]\n    \"\"\"\n", "entry_point": "derivative", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\n    assert candidate([1, 2, 3]) == [2, 6]\n    assert candidate([3, 2, 1]) == [2, 2]\n    assert candidate([3, 2, 1, 0, 4]) == [2, 2, 0, 16]\n    assert candidate([1]) == []\n\n", "language": "python", "canonical_solution": "    return [(i * x) for i, x in enumerate(xs)][1:]\n", "description": "xs表示一个多项式的系数。\n    xs[0] + xs[1] * x + xs[2] * x^2 + ....\n    返回该多项式的导数，形式不变。\n    ", "natural_language": "Chinese"}
27 | {"task_id": "python/26", "prompt": "\n\ndef fibfib(n: int):\n    \"\"\"FibFib数列是一种类似于斐波那契数列的数列，定义如下：\n    fibfib(0) == 0\n    fibfib(1) == 0\n    fibfib(2) == 1\n    fibfib(n) == fibfib(n-1) + fibfib(n-2) + fibfib(n-3)。\n    请编写一个函数，高效地计算FibFib数列的第n个元素。\n    \n    >>> fibfib(1)\n    0\n    >>> fibfib(5)\n    4\n    >>> fibfib(8)\n    24\n    \"\"\"\n", "entry_point": "fibfib", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(2) == 1\n    assert candidate(1) == 0\n    assert candidate(5) == 4\n    assert candidate(8) == 24\n    assert candidate(10) == 81\n    assert candidate(12) == 274\n    assert candidate(14) == 927\n\n", "language": "python", "canonical_solution": "    if n == 0:\n        return 0\n    if n == 1:\n        return 0\n    if n == 2:\n        return 1\n    return fibfib(n - 1) + fibfib(n - 2) + fibfib(n - 3)\n", "description": "FibFib数列是一种类似于斐波那契数列的数列，定义如下：\n    fibfib(0) == 0\n    fibfib(1) == 0\n    fibfib(2) == 1\n    fibfib(n) == fibfib(n-1) + fibfib(n-2) + fibfib(n-3)。\n    请编写一个函数，高效地计算FibFib数列的第n个元素。\n    ", "natural_language": "Chinese"}
28 | {"task_id": "python/27", "prompt": "\nFIX = \"\"\"\nAdd more test cases.\n\"\"\"\n\ndef vowels_count(s):\n    \"\"\"编写一个函数vowels_count，它接受表示单词的字符串作为输入，并返回字符串中元音字母的数量。在这种情况下，元音字母是'a'，'e'，'i'，'o'，'u'。在这里，'y'也是元音字母，但仅当它在给定单词的末尾时。\n\n    例：\n    \n    >>> vowels_count(\"abcde\")\n    2\n    >>> vowels_count(\"ACEDY\")\n    3\n    \"\"\"\n", "entry_point": "vowels_count", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"abcde\") == 2, \"Test 1\"\n    assert candidate(\"Alone\") == 3, \"Test 2\"\n    assert candidate(\"key\") == 2, \"Test 3\"\n    assert candidate(\"bye\") == 1, \"Test 4\"\n    assert candidate(\"keY\") == 2, \"Test 5\"\n    assert candidate(\"bYe\") == 1, \"Test 6\"\n    assert candidate(\"ACEDY\") == 3, \"Test 7\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    vowels = \"aeiouAEIOU\"\n    n_vowels = sum(c in vowels for c in s)\n    if s[-1] == 'y' or s[-1] == 'Y':\n        n_vowels += 1\n    return n_vowels\n", "description": "编写一个函数vowels_count，它接受表示单词的字符串作为输入，并返回字符串中元音字母的数量。在这种情况下，元音字母是'a'，'e'，'i'，'o'，'u'。在这里，'y'也是元音字母，但仅当它在给定单词的末尾时。\n\n    例：\n    ", "natural_language": "Chinese"}
29 | {"task_id": "python/28", "prompt": "\ndef search(lst):\n    '''\n    给定一个非空的正整数列表。返回大于零且频率大于或等于该整数本身值的最大整数。整数的频率是它在列表中出现的次数。如果不存在这样的值，则返回-1。示例：\n    \n        search([4, 1, 2, 2, 3, 1]) == 2\n        search([1, 2, 2, 3, 3, 3, 4, 4, 4]) == 3\n        search([5, 5, 4, 4, 4]) == -1\n    '''\n", "entry_point": "search", "test": "def check(candidate):\n\n    # manually generated tests\n    assert candidate([5, 5, 5, 5, 1]) == 1\n    assert candidate([4, 1, 4, 1, 4, 4]) == 4\n    assert candidate([3, 3]) == -1\n    assert candidate([8, 8, 8, 8, 8, 8, 8, 8]) == 8\n    assert candidate([2, 3, 3, 2, 2]) == 2\n\n    # automatically generated tests\n    assert candidate([2, 7, 8, 8, 4, 8, 7, 3, 9, 6, 5, 10, 4, 3, 6, 7, 1, 7, 4, 10, 8, 1]) == 1\n    assert candidate([3, 2, 8, 2]) == 2\n    assert candidate([6, 7, 1, 8, 8, 10, 5, 8, 5, 3, 10]) == 1\n    assert candidate([8, 8, 3, 6, 5, 6, 4]) == -1\n    assert candidate([6, 9, 6, 7, 1, 4, 7, 1, 8, 8, 9, 8, 10, 10, 8, 4, 10, 4, 10, 1, 2, 9, 5, 7, 9]) == 1\n    assert candidate([1, 9, 10, 1, 3]) == 1\n    assert candidate([6, 9, 7, 5, 8, 7, 5, 3, 7, 5, 10, 10, 3, 6, 10, 2, 8, 6, 5, 4, 9, 5, 3, 10]) == 5\n    assert candidate([1]) == 1\n    assert candidate([8, 8, 10, 6, 4, 3, 5, 8, 2, 4, 2, 8, 4, 6, 10, 4, 2, 1, 10, 2, 1, 1, 5]) == 4\n    assert candidate([2, 10, 4, 8, 2, 10, 5, 1, 2, 9, 5, 5, 6, 3, 8, 6, 4, 10]) == 2\n    assert candidate([1, 6, 10, 1, 6, 9, 10, 8, 6, 8, 7, 3]) == 1\n    assert candidate([9, 2, 4, 1, 5, 1, 5, 2, 5, 7, 7, 7, 3, 10, 1, 5, 4, 2, 8, 4, 1, 9, 10, 7, 10, 2, 8, 10, 9, 4]) == 4\n    assert candidate([2, 6, 4, 2, 8, 7, 5, 6, 4, 10, 4, 6, 3, 7, 8, 8, 3, 1, 4, 2, 2, 10, 7]) == 4\n    assert candidate([9, 8, 6, 10, 2, 6, 10, 2, 7, 8, 10, 3, 8, 2, 6, 2, 3, 1]) == 2\n    assert candidate([5, 5, 3, 9, 5, 6, 3, 2, 8, 5, 6, 10, 10, 6, 8, 4, 10, 7, 7, 10, 8]) == -1\n    assert candidate([10]) == -1\n    assert candidate([9, 7, 7, 2, 4, 7, 2, 10, 9, 7, 5, 7, 2]) == 2\n    assert candidate([5, 4, 10, 2, 1, 1, 10, 3, 6, 1, 8]) == 1\n    assert candidate([7, 9, 9, 9, 3, 4, 1, 5, 9, 1, 2, 1, 1, 10, 7, 5, 6, 7, 6, 7, 7, 6]) == 1\n    assert candidate([3, 10, 10, 9, 2]) == -1\n\n", "language": "python", "canonical_solution": "    frq = [0] * (max(lst) + 1)\n    for i in lst:\n        frq[i] += 1;\n\n    ans = -1\n    for i in range(1, len(frq)):\n        if frq[i] >= i:\n            ans = i\n    \n    return ans\n", "description": "给定一个非空的正整数列表。返回大于零且频率大于或等于该整数本身值的最大整数。整数的频率是它在列表中出现的次数。如果不存在这样的值，则返回-1。示例：\n    ", "natural_language": "Chinese"}
30 | {"task_id": "python/29", "prompt": "\ndef triangle_area(a, b, c):\n    '''\n    给定三角形的三条边长。如果这三条边可以组成一个有效的三角形，则返回保留两位小数的三角形面积。否则返回-1。当任意两条边的和大于第三条边时，三条边才能组成一个有效的三角形。例如：\n    \n    triangle_area(3, 4, 5) == 6.00\n    triangle_area(1, 2, 10) == -1\n    '''\n", "entry_point": "triangle_area", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(3, 4, 5) == 6.00, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(1, 2, 10) == -1\n    assert candidate(4, 8, 5) == 8.18\n    assert candidate(2, 2, 2) == 1.73\n    assert candidate(1, 2, 3) == -1\n    assert candidate(10, 5, 7) == 16.25\n    assert candidate(2, 6, 3) == -1\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1, 1) == 0.43, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(2, 2, 10) == -1\n\n", "language": "python", "canonical_solution": "    if a + b <= c or a + c <= b or b + c <= a:\n        return -1 \n    s = (a + b + c)/2    \n    area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n    area = round(area, 2)\n    return area\n", "description": "给定三角形的三条边长。如果这三条边可以组成一个有效的三角形，则返回保留两位小数的三角形面积。否则返回-1。当任意两条边的和大于第三条边时，三条边才能组成一个有效的三角形。例如：\n    ", "natural_language": "Chinese"}
31 | {"task_id": "python/30", "prompt": "\ndef will_it_fly(q,w):\n    '''\n    编写一个函数，如果对象q能够飞行则返回True，否则返回False。\n    如果对象q平衡（它是一个回文列表）且其元素的总和小于或等于最大可能重量w，则对象q将飞行。\n\n        示例：\n    will_it_fly([1, 2], 5) ➞ False \n    # 1+2小于最大可能重量，但不平衡。\n\n        will_it_fly([3, 2, 3], 1) ➞ False\n    # 它是平衡的，但3+2+3大于最大可能重量。\n\n        will_it_fly([3, 2, 3], 9) ➞ True\n    # 3+2+3小于最大可能重量，且平衡。\n\n        will_it_fly([3], 5) ➞ True\n    # 3小于最大可能重量，且平衡。\n    \n    '''\n", "entry_point": "will_it_fly", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([3, 2, 3], 9) is True\n    assert candidate([1, 2], 5) is False\n    assert candidate([3], 5) is True\n    assert candidate([3, 2, 3], 1) is False\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 3], 6) is False\n    assert candidate([5], 5) is True\n\n", "language": "python", "canonical_solution": "    if sum(q) > w:\n        return False\n\n    i, j = 0, len(q)-1\n    while i<j:\n        if q[i] != q[j]:\n            return False\n        i+=1\n        j-=1\n    return True\n", "description": "编写一个函数，如果对象q能够飞行则返回True，否则返回False。\n    如果对象q平衡（它是一个回文列表）且其元素的总和小于或等于最大可能重量w，则对象q将飞行。\n\n        示例：\n    will_it_fly([1, 2], 5) ➞ False \n    # 1+2小于最大可能重量，但不平衡。\n\n        will_it_fly([3, 2, 3], 1) ➞ False\n    # 它是平衡的，但3+2+3大于最大可能重量。\n\n        will_it_fly([3, 2, 3], 9) ➞ True\n    # 3+2+3小于最大可能重量，且平衡。\n\n        will_it_fly([3], 5) ➞ True\n    # 3小于最大可能重量，且平衡。\n    ", "natural_language": "Chinese"}
32 | {"task_id": "python/31", "prompt": "\ndef is_multiply_prime(a):\n    \"\"\"编写一个函数，如果给定的数字是3个质数的乘积，则返回true，否则返回false。已知（a）小于100。示例：\n    \n    is_multiply_prime(30) == True\n    30 = 2 * 3 * 5\n    \"\"\"\n", "entry_point": "is_multiply_prime", "test": "def check(candidate):\n\n    assert candidate(5) == False\n    assert candidate(30) == True\n    assert candidate(8) == True\n    assert candidate(10) == False\n    assert candidate(125) == True\n    assert candidate(3 * 5 * 7) == True\n    assert candidate(3 * 6 * 7) == False\n    assert candidate(9 * 9 * 9) == False\n    assert candidate(11 * 9 * 9) == False\n    assert candidate(11 * 13 * 7) == True\n\n", "language": "python", "canonical_solution": "    def is_prime(n):\n        for j in range(2,n):\n            if n%j == 0:\n                return False\n        return True\n\n    for i in range(2,101):\n        if not is_prime(i): continue\n        for j in range(2,101):\n            if not is_prime(j): continue\n            for k in range(2,101):\n                if not is_prime(k): continue\n                if i*j*k == a: return True\n    return False\n", "description": "编写一个函数，如果给定的数字是3个质数的乘积，则返回true，否则返回false。已知（a）小于100。示例：\n    ", "natural_language": "Chinese"}
33 | {"task_id": "python/32", "prompt": "\ndef decimal_to_binary(decimal):\n    \"\"\"你将会得到一个十进制数，你的任务是将它转换成二进制格式。该函数应该返回一个字符串，每个字符代表一个二进制数。字符串中的每个字符都将是'0'或'1'。\n\n    字符串开头和结尾会有额外的字符'db'。这些额外的字符是为了帮助格式化。\n\n    例子：\n    \n    decimal_to_binary(15)   # returns \"db1111db\"\n    decimal_to_binary(32)   # returns \"db100000db\"\n    \"\"\"\n", "entry_point": "decimal_to_binary", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(0) == \"db0db\"\n    assert candidate(32) == \"db100000db\"\n    assert candidate(103) == \"db1100111db\"\n    assert candidate(15) == \"db1111db\", \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    return \"db\" + bin(decimal)[2:] + \"db\"\n", "description": "你将会得到一个十进制数，你的任务是将它转换成二进制格式。该函数应该返回一个字符串，每个字符代表一个二进制数。字符串中的每个字符都将是'0'或'1'。\n\n    字符串开头和结尾会有额外的字符'db'。这些额外的字符是为了帮助格式化。\n\n    例子：\n    ", "natural_language": "Chinese"}
34 | {"task_id": "python/33", "prompt": "\ndef is_happy(s):\n    \"\"\"给定一个字符串s。\n你的任务是检查该字符串是否快乐。\n如果字符串的长度至少为3且每3个连续的字母都不同，则该字符串是快乐的。\n例如：\n    \n    is_happy(a) => False\n    is_happy(aa) => False\n    is_happy(abcd) => True\n    is_happy(aabb) => False\n    is_happy(adb) => True\n    is_happy(xyy) => False\n    \"\"\"\n", "entry_point": "is_happy", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"a\") == False , \"a\"\n    assert candidate(\"aa\") == False , \"aa\"\n    assert candidate(\"abcd\") == True , \"abcd\"\n    assert candidate(\"aabb\") == False , \"aabb\"\n    assert candidate(\"adb\") == True , \"adb\"\n    assert candidate(\"xyy\") == False , \"xyy\"\n    assert candidate(\"iopaxpoi\") == True , \"iopaxpoi\"\n    assert candidate(\"iopaxioi\") == False , \"iopaxioi\"\n", "language": "python", "canonical_solution": "    if len(s) < 3:\n      return False\n\n    for i in range(len(s) - 2):\n      \n      if s[i] == s[i+1] or s[i+1] == s[i+2] or s[i] == s[i+2]:\n        return False\n    return True\n", "description": "给定一个字符串s。\n你的任务是检查该字符串是否快乐。\n如果字符串的长度至少为3且每3个连续的字母都不同，则该字符串是快乐的。\n例如：\n    ", "natural_language": "Chinese"}
35 | {"task_id": "python/34", "prompt": "\ndef numerical_letter_grade(grades):\n    \"\"\"这是学期的最后一周，老师需要给学生们打分。老师一直在制定自己的评分算法。唯一的问题是，她丢失了用于评分的代码。她给了你一份学生的GPA清单，你需要编写一个函数，根据以下表格输出字母等级的清单：\n\n                 GPA       |    Letter grade\n              4.0                A+\n            > 3.7                A \n            > 3.3                A- \n            > 3.0                B+\n            > 2.7                B \n            > 2.3                B-\n            > 2.0                C+\n            > 1.7                C\n            > 1.3                C-\n            > 1.0                D+ \n            > 0.7                D \n            > 0.0                D-\n              0.0                E\n    \n\n        例如：\n    \n    grade_equation([4.0, 3, 1.7, 2, 3.5]) ==> ['A+', 'B', 'C-', 'C', 'A-']\n    \"\"\"\n", "entry_point": "numerical_letter_grade", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([4.0, 3, 1.7, 2, 3.5]) == ['A+', 'B', 'C-', 'C', 'A-']\n    assert candidate([1.2]) == ['D+']\n    assert candidate([0.5]) == ['D-']\n    assert candidate([0.0]) == ['E']\n    assert candidate([1, 0.3, 1.5, 2.8, 3.3]) == ['D', 'D-', 'C-', 'B', 'B+']\n    assert candidate([0, 0.7]) == ['E', 'D-']\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n", "language": "python", "canonical_solution": "\n   \n    letter_grade = []\n    for gpa in grades:\n        if gpa == 4.0:\n            letter_grade.append(\"A+\")\n        elif gpa > 3.7:\n            letter_grade.append(\"A\")\n        elif gpa > 3.3:\n            letter_grade.append(\"A-\")\n        elif gpa > 3.0:\n            letter_grade.append(\"B+\")\n        elif gpa > 2.7:\n            letter_grade.append(\"B\")\n        elif gpa > 2.3:\n            letter_grade.append(\"B-\")\n        elif gpa > 2.0:\n            letter_grade.append(\"C+\")\n        elif gpa > 1.7:\n            letter_grade.append(\"C\")\n        elif gpa > 1.3:\n            letter_grade.append(\"C-\")\n        elif gpa > 1.0:\n            letter_grade.append(\"D+\")\n        elif gpa > 0.7:\n            letter_grade.append(\"D\")\n        elif gpa > 0.0:\n            letter_grade.append(\"D-\")\n        else:\n            letter_grade.append(\"E\")\n    return letter_grade\n", "description": "这是学期的最后一周，老师需要给学生们打分。老师一直在制定自己的评分算法。唯一的问题是，她丢失了用于评分的代码。她给了你一份学生的GPA清单，你需要编写一个函数，根据以下表格输出字母等级的清单：\n\n                 GPA       |    Letter grade\n              4.0                A+\n            > 3.7                A \n            > 3.3                A- \n            > 3.0                B+\n            > 2.7                B \n            > 2.3                B-\n            > 2.0                C+\n            > 1.7                C\n            > 1.3                C-\n            > 1.0                D+ \n            > 0.7                D \n            > 0.0                D-\n              0.0                E\n    \n\n        例如：\n    ", "natural_language": "Chinese"}
36 | {"task_id": "python/35", "prompt": "\ndef prime_length(string):\n    \"\"\"编写一个函数，它接受一个字符串并返回True，如果字符串长度是一个质数，否则返回False。\n示例：\n    \n    prime_length('Hello') == True\n    prime_length('abcdcba') == True\n    prime_length('kittens') == True\n    prime_length('orange') == False\n    \"\"\"\n", "entry_point": "prime_length", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('Hello') == True\n    assert candidate('abcdcba') == True\n    assert candidate('kittens') == True\n    assert candidate('orange') == False\n    assert candidate('wow') == True\n    assert candidate('world') == True\n    assert candidate('MadaM') == True\n    assert candidate('Wow') == True\n    assert candidate('') == False\n    assert candidate('HI') == True\n    assert candidate('go') == True\n    assert candidate('gogo') == False\n    assert candidate('aaaaaaaaaaaaaaa') == False\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate('Madam') == True\n    assert candidate('M') == False\n    assert candidate('0') == False\n\n", "language": "python", "canonical_solution": "    l = len(string)\n    if l == 0 or l == 1:\n        return False\n    for i in range(2, l):\n        if l % i == 0:\n            return False\n    return True\n", "description": "编写一个函数，它接受一个字符串并返回True，如果字符串长度是一个质数，否则返回False。\n示例：\n    ", "natural_language": "Chinese"}
37 | {"task_id": "python/36", "prompt": "\ndef solve(N):\n    \"\"\"给定一个正整数N，返回其二进制表示下所有数字的总和。\n\n        示例\n        对于N = 1000，数字总和为1，输出应为“1”。\n        对于N = 150，数字总和为6，输出应为“110”。\n        对于N = 147，数字总和为12，输出应为“1100”。\n\n        变量：\n        @N 整数\n             约束条件：0 ≤ N ≤ 10000。\n    输出：\n         一个二进制数字的字符串。\n    \n    \"\"\"\n", "entry_point": "solve", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(1000) == \"1\", \"Error\"\n    assert candidate(150) == \"110\", \"Error\"\n    assert candidate(147) == \"1100\", \"Error\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(333) == \"1001\", \"Error\"\n    assert candidate(963) == \"10010\", \"Error\"\n\n", "language": "python", "canonical_solution": "    return bin(sum(int(i) for i in str(N)))[2:]\n", "description": "给定一个正整数N，返回其二进制表示下所有数字的总和。\n\n        示例\n        对于N = 1000，数字总和为1，输出应为“1”。\n        对于N = 150，数字总和为6，输出应为“110”。\n        对于N = 147，数字总和为12，输出应为“1100”。\n\n        变量：\n        @N 整数\n             约束条件：0 ≤ N ≤ 10000。\n    输出：\n         一个二进制数字的字符串。\n    ", "natural_language": "Chinese"}
38 | {"task_id": "python/37", "prompt": "\ndef get_row(lst, x):\n    \"\"\"\n    给定一个二维数据，作为嵌套列表，类似于矩阵，但与矩阵不同的是，每行可能包含不同数量的列。给定lst和整数x，在列表中查找整数x，并返回元组列表[(x1，y1)，（x2，y2）...]，使得每个元组都是一个坐标-（行，列），从0开始。最初按行按升序排序坐标。此外，按列按降序排序行的坐标。\n\n    例子：\n    \n    get_row([\n      [1,2,3,4,5,6],\n      [1,2,3,4,1,6],\n      [1,2,3,4,5,1]\n    ], 1) == [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]\n    get_row([], 1) == []\n    get_row([[], [1], [1, 2, 3]], 3) == [(2, 2)]\n    \"\"\"\n", "entry_point": "get_row", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([\n        [1,2,3,4,5,6],\n        [1,2,3,4,1,6],\n        [1,2,3,4,5,1]\n    ], 1) == [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]\n    assert candidate([\n        [1,2,3,4,5,6],\n        [1,2,3,4,5,6],\n        [1,2,3,4,5,6],\n        [1,2,3,4,5,6],\n        [1,2,3,4,5,6],\n        [1,2,3,4,5,6]\n    ], 2) == [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)]\n    assert candidate([\n        [1,2,3,4,5,6],\n        [1,2,3,4,5,6],\n        [1,1,3,4,5,6],\n        [1,2,1,4,5,6],\n        [1,2,3,1,5,6],\n        [1,2,3,4,1,6],\n        [1,2,3,4,5,1]\n    ], 1) == [(0, 0), (1, 0), (2, 1), (2, 0), (3, 2), (3, 0), (4, 3), (4, 0), (5, 4), (5, 0), (6, 5), (6, 0)]\n    assert candidate([], 1) == []\n    assert candidate([[1]], 2) == []\n    assert candidate([[], [1], [1, 2, 3]], 3) == [(2, 2)]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n", "language": "python", "canonical_solution": "    coords = [(i, j) for i in range(len(lst)) for j in range(len(lst[i])) if lst[i][j] == x]\n    return sorted(sorted(coords, key=lambda x: x[1], reverse=True), key=lambda x: x[0])\n", "description": "给定一个二维数据，作为嵌套列表，类似于矩阵，但与矩阵不同的是，每行可能包含不同数量的列。给定lst和整数x，在列表中查找整数x，并返回元组列表[(x1，y1)，（x2，y2）...]，使得每个元组都是一个坐标-（行，列），从0开始。最初按行按升序排序坐标。此外，按列按降序排序行的坐标。\n\n    例子：\n    ", "natural_language": "Chinese"}
39 | {"task_id": "python/38", "prompt": "\ndef next_smallest(lst):\n    \"\"\"\n    你有一个整数列表。\n编写一个函数next_smallest()，返回列表中第二小的元素。\n如果没有这样的元素，则返回null。\n    \n    \n    next_smallest([1, 2, 3, 4, 5]) == 2\n    next_smallest([5, 1, 4, 3, 2]) == 2\n    next_smallest([]) == None\n    next_smallest([1, 1]) == None\n    \"\"\"\n", "entry_point": "next_smallest", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 2, 3, 4, 5]) == 2\n    assert candidate([5, 1, 4, 3, 2]) == 2\n    assert candidate([]) == None\n    assert candidate([1, 1]) == None\n    assert candidate([1,1,1,1,0]) == 1\n    assert candidate([1, 0**0]) == None\n    assert candidate([-35, 34, 12, -45]) == -35\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n", "language": "python", "canonical_solution": "    lst = sorted(set(lst))\n    return None if len(lst) < 2 else lst[1]\n", "description": "你有一个整数列表。\n编写一个函数next_smallest()，返回列表中第二小的元素。\n如果没有这样的元素，则返回null。\n    ", "natural_language": "Chinese"}
40 | {"task_id": "python/39", "prompt": "\ndef is_bored(S):\n    \"\"\"\n    你将会得到一个由单词组成的字符串，你的任务是计算无聊的数量。无聊的句子是以单词\"I\"开头的句子。句子以'.'、'?'或'!'为分隔符。\n\n    例如：\n    \n    >>> is_bored(\"Hello world\")\n    0\n    >>> is_bored(\"The sky is blue. The sun is shining. I love this weather\")\n    1\n    \"\"\"\n", "entry_point": "is_bored", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Hello world\") == 0, \"Test 1\"\n    assert candidate(\"Is the sky blue?\") == 0, \"Test 2\"\n    assert candidate(\"I love It !\") == 1, \"Test 3\"\n    assert candidate(\"bIt\") == 0, \"Test 4\"\n    assert candidate(\"I feel good today. I will be productive. will kill It\") == 2, \"Test 5\"\n    assert candidate(\"You and I are going for a walk\") == 0, \"Test 6\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    import re\n    sentences = re.split(r'[.?!]\\s*', S)\n    return sum(sentence[0:2] == 'I ' for sentence in sentences)\n", "description": "你将会得到一个由单词组成的字符串，你的任务是计算无聊的数量。无聊的句子是以单词\"I\"开头的句子。句子以'.'、'?'或'!'为分隔符。\n\n    例如：\n    ", "natural_language": "Chinese"}
41 | {"task_id": "python/40", "prompt": "\n\ndef skjkasdkd(lst):\n    \"\"\"给定一个整数列表。\n你需要找到最大的质数值并返回其数字之和。\n\n    例子：\n    \n    For lst = [0,3,2,1,3,5,7,4,5,5,5,2,181,32,4,32,3,2,32,324,4,3] the output should be 10\n    For lst = [1,0,1,8,2,4597,2,1,3,40,1,2,1,2,4,2,5,1] the output should be 25\n    For lst = [1,3,1,32,5107,34,83278,109,163,23,2323,32,30,1,9,3] the output should be 13\n    For lst = [0,724,32,71,99,32,6,0,5,91,83,0,5,6] the output should be 11\n    For lst = [0,81,12,3,1,21] the output should be 3\n    For lst = [0,8,1,2,1,7] the output should be 7\n    \"\"\"\n", "entry_point": "skjkasdkd", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([0,3,2,1,3,5,7,4,5,5,5,2,181,32,4,32,3,2,32,324,4,3]) == 10, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1,0,1,8,2,4597,2,1,3,40,1,2,1,2,4,2,5,1]) == 25, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1,3,1,32,5107,34,83278,109,163,23,2323,32,30,1,9,3]) == 13, \"This prints if this assert fails 3 (also good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([0,724,32,71,99,32,6,0,5,91,83,0,5,6]) == 11, \"This prints if this assert fails 4 (also good for debugging!)\"\n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([0,81,12,3,1,21]) == 3, \"This prints if this assert fails 5 (also good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([0,8,1,2,1,7]) == 7, \"This prints if this assert fails 6 (also good for debugging!)\"\n\n    assert candidate([8191]) == 19, \"This prints if this assert fails 7 (also good for debugging!)\"\n    assert candidate([8191, 123456, 127, 7]) == 19, \"This prints if this assert fails 8 (also good for debugging!)\"\n    assert candidate([127, 97, 8192]) == 10, \"This prints if this assert fails 9 (also good for debugging!)\"\n", "language": "python", "canonical_solution": "    def isPrime(n):\n        for i in range(2,int(n**0.5)+1):\n            if n%i==0:\n                return False\n\n        return True\n    maxx = 0\n    i = 0\n    while i < len(lst):\n        if(lst[i] > maxx and isPrime(lst[i])):\n            maxx = lst[i]\n        i+=1\n    result = sum(int(digit) for digit in str(maxx))\n    return result\n\n", "description": "给定一个整数列表。\n你需要找到最大的质数值并返回其数字之和。\n\n    例子：\n    ", "natural_language": "Chinese"}
42 | {"task_id": "python/41", "prompt": "\ndef check_dict_case(dict):\n    \"\"\"\n    给定一个字典，如果所有键都是小写字符串或所有键都是大写字符串，则返回True，否则返回False。如果给定的字典为空，则函数应返回False。示例：\n    \n    check_dict_case({\"a\":\"apple\", \"b\":\"banana\"}) should return True.\n    check_dict_case({\"a\":\"apple\", \"A\":\"banana\", \"B\":\"banana\"}) should return False.\n    check_dict_case({\"a\":\"apple\", 8:\"banana\", \"a\":\"apple\"}) should return False.\n    check_dict_case({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) should return False.\n    check_dict_case({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) should return True.\n    \"\"\"\n", "entry_point": "check_dict_case", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate({\"p\":\"pineapple\", \"b\":\"banana\"}) == True, \"First test error: \" + str(candidate({\"p\":\"pineapple\", \"b\":\"banana\"}))\n    assert candidate({\"p\":\"pineapple\", \"A\":\"banana\", \"B\":\"banana\"}) == False, \"Second test error: \" + str(candidate({\"p\":\"pineapple\", \"A\":\"banana\", \"B\":\"banana\"}))\n    assert candidate({\"p\":\"pineapple\", 5:\"banana\", \"a\":\"apple\"}) == False, \"Third test error: \" + str(candidate({\"p\":\"pineapple\", 5:\"banana\", \"a\":\"apple\"}))\n    assert candidate({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) == False, \"Fourth test error: \" + str(candidate({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}))\n    assert candidate({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) == True, \"Fifth test error: \" + str(candidate({\"STATE\":\"NC\", \"ZIP\":\"12345\" }))      \n    assert candidate({\"fruit\":\"Orange\", \"taste\":\"Sweet\" }) == True, \"Fourth test error: \" + str(candidate({\"fruit\":\"Orange\", \"taste\":\"Sweet\" }))      \n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate({}) == False, \"1st edge test error: \" + str(candidate({}))\n\n", "language": "python", "canonical_solution": "    if len(dict.keys()) == 0:\n        return False\n    else:\n        state = \"start\"\n        for key in dict.keys():\n\n            if isinstance(key, str) == False:\n                state = \"mixed\"\n                break\n            if state == \"start\":\n                if key.isupper():\n                    state = \"upper\"\n                elif key.islower():\n                    state = \"lower\"\n                else:\n                    break\n            elif (state == \"upper\" and not key.isupper()) or (state == \"lower\" and not key.islower()):\n                    state = \"mixed\"\n                    break\n            else:\n                break\n        return state == \"upper\" or state == \"lower\" \n", "description": "给定一个字典，如果所有键都是小写字符串或所有键都是大写字符串，则返回True，否则返回False。如果给定的字典为空，则函数应返回False。示例：\n    ", "natural_language": "Chinese"}
43 | {"task_id": "python/42", "prompt": "\ndef closest_integer(value):\n    '''\n    创建一个函数，它接受一个表示数字的值（字符串），并返回最接近它的整数。如果该数字距离两个整数相等，则将其四舍五入到远离零的方向。\n\n    例子：\n    \n    >>> closest_integer(\"10\")\n    10\n    >>> closest_integer(\"15.3\")\n    15\n\n    Note:\n    Rounding away from zero means that if the given number is equidistant\n    from two integers, the one you should return is the one that is the\n    farthest from zero. For example closest_integer(\"14.5\") should\n    return 15 and closest_integer(\"-14.5\") should return -15.\n    '''\n", "entry_point": "closest_integer", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"10\") == 10, \"Test 1\"\n    assert candidate(\"14.5\") == 15, \"Test 2\"\n    assert candidate(\"-15.5\") == -16, \"Test 3\"\n    assert candidate(\"15.3\") == 15, \"Test 3\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"0\") == 0, \"Test 0\"\n\n", "language": "python", "canonical_solution": "    from math import floor, ceil\n\n    if value.count('.') == 1:\n        # remove trailing zeros\n        while (value[-1] == '0'):\n            value = value[:-1]\n\n    num = float(value)\n    if value[-2:] == '.5':\n        if num > 0:\n            res = ceil(num)\n        else:\n            res = floor(num)\n    elif len(value) > 0:\n        res = int(round(num))\n    else:\n        res = 0\n\n    return res\n\n", "description": "创建一个函数，它接受一个表示数字的值（字符串），并返回最接近它的整数。如果该数字距离两个整数相等，则将其四舍五入到远离零的方向。\n\n    例子：\n    ", "natural_language": "Chinese"}
44 | {"task_id": "python/43", "prompt": "\ndef make_a_pile(n):\n    \"\"\"\n    给定一个正整数n，你需要建立一个n级的石头堆。\n第一层有n个石头。\n下一层的石头数量为：\n- 如果n是奇数，则为下一个奇数。\n- 如果n是偶数，则为下一个偶数。\n返回一个列表，其中索引为i的元素表示第(i+1)层的石头数量。\n\n    例子：\n    \n    >>> make_a_pile(3)\n    [3, 5, 7]\n    \"\"\"\n", "entry_point": "make_a_pile", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(3) == [3, 5, 7], \"Test 3\"\n    assert candidate(4) == [4,6,8,10], \"Test 4\"\n    assert candidate(5) == [5, 7, 9, 11, 13]\n    assert candidate(6) == [6, 8, 10, 12, 14, 16]\n    assert candidate(8) == [8, 10, 12, 14, 16, 18, 20, 22]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    return [n + 2*i for i in range(n)]\n", "description": "给定一个正整数n，你需要建立一个n级的石头堆。\n第一层有n个石头。\n下一层的石头数量为：\n- 如果n是奇数，则为下一个奇数。\n- 如果n是偶数，则为下一个偶数。\n返回一个列表，其中索引为i的元素表示第(i+1)层的石头数量。\n\n    例子：\n    ", "natural_language": "Chinese"}
45 | {"task_id": "python/44", "prompt": "\ndef words_string(s):\n    \"\"\"\n    你将会得到一个由逗号或空格分隔的单词字符串。你的任务是将字符串分割成单词并返回一个单词数组。\n\n    例如：\n    \n    words_string(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n    words_string(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n    \"\"\"\n", "entry_point": "words_string", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n    assert candidate(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n    assert candidate(\"Hi, my name\") == [\"Hi\", \"my\", \"name\"]\n    assert candidate(\"One,, two, three, four, five, six,\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(\"\") == []\n    assert candidate(\"ahmed     , gamal\") == [\"ahmed\", \"gamal\"]\n\n", "language": "python", "canonical_solution": "    if not s:\n        return []\n\n    s_list = []\n\n    for letter in s:\n        if letter == ',':\n            s_list.append(' ')\n        else:\n            s_list.append(letter)\n\n    s_list = \"\".join(s_list)\n    return s_list.split()\n", "description": "你将会得到一个由逗号或空格分隔的单词字符串。你的任务是将字符串分割成单词并返回一个单词数组。\n\n    例如：\n    ", "natural_language": "Chinese"}
46 | {"task_id": "python/45", "prompt": "\ndef choose_num(x, y):\n    \"\"\"这个函数接受两个正整数x和y，并返回在[x，y]范围内的最大偶数整数。如果没有这样的数字，则函数应返回-1。\n\n    例如：\n    \n    choose_num(12, 15) = 14\n    choose_num(13, 12) = -1\n    \"\"\"\n", "entry_point": "choose_num", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(12, 15) == 14\n    assert candidate(13, 12) == -1\n    assert candidate(33, 12354) == 12354\n    assert candidate(5234, 5233) == -1\n    assert candidate(6, 29) == 28\n    assert candidate(27, 10) == -1\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(7, 7) == -1\n    assert candidate(546, 546) == 546\n\n", "language": "python", "canonical_solution": "    if x > y:\n        return -1\n    if y % 2 == 0:\n        return y\n    if x == y:\n        return -1\n    return y - 1\n", "description": "这个函数接受两个正整数x和y，并返回在[x，y]范围内的最大偶数整数。如果没有这样的数字，则函数应返回-1。\n\n    例如：\n    ", "natural_language": "Chinese"}
47 | {"task_id": "python/46", "prompt": "\ndef rounded_avg(n, m):\n    \"\"\"给定两个正整数n和m，你的任务是计算从n到m（包括n和m）的整数的平均值。\n将答案四舍五入为最接近的整数，并将其转换为二进制。\n如果n大于m，则返回-1。\n例子：\n    \n    rounded_avg(1, 5) => \"0b11\"\n    rounded_avg(7, 5) => -1\n    rounded_avg(10, 20) => \"0b1111\"\n    rounded_avg(20, 33) => \"0b11010\"\n    \"\"\"\n", "entry_point": "rounded_avg", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(1, 5) == \"0b11\"\n    assert candidate(7, 13) == \"0b1010\"\n    assert candidate(964,977) == \"0b1111001010\"\n    assert candidate(996,997) == \"0b1111100100\"\n    assert candidate(560,851) == \"0b1011000010\"\n    assert candidate(185,546) == \"0b101101110\"\n    assert candidate(362,496) == \"0b110101101\"\n    assert candidate(350,902) == \"0b1001110010\"\n    assert candidate(197,233) == \"0b11010111\"\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(7, 5) == -1\n    assert candidate(5, 1) == -1\n    assert candidate(5, 5) == \"0b101\"\n\n", "language": "python", "canonical_solution": "    if m < n:\n        return -1\n    summation = 0\n    for i in range(n, m+1):\n        summation += i\n    return bin(round(summation/(m - n + 1)))\n", "description": "给定两个正整数n和m，你的任务是计算从n到m（包括n和m）的整数的平均值。\n将答案四舍五入为最接近的整数，并将其转换为二进制。\n如果n大于m，则返回-1。\n例子：\n    ", "natural_language": "Chinese"}
48 | {"task_id": "python/47", "prompt": "\ndef f(n):\n    \"\"\" 实现函数f，它以n为参数，返回一个大小为n的列表，其中索引i处的元素值为i的阶乘（如果i为偶数）或1到i的数字之和（如果i为奇数）。i从1开始。i的阶乘是从1到i的数字的乘积（1 * 2 * ... * i）。例如：\n    \n    f(5) == [1, 2, 6, 24, 15]\n    \"\"\"\n", "entry_point": "f", "test": "def check(candidate):\n\n    assert candidate(5) == [1, 2, 6, 24, 15]\n    assert candidate(7) == [1, 2, 6, 24, 15, 720, 28]\n    assert candidate(1) == [1]\n    assert candidate(3) == [1, 2, 6]\n", "language": "python", "canonical_solution": "    ret = []\n    for i in range(1,n+1):\n        if i%2 == 0:\n            x = 1\n            for j in range(1,i+1): x *= j\n            ret += [x]\n        else:\n            x = 0\n            for j in range(1,i+1): x += j\n            ret += [x]\n    return ret\n", "description": "实现函数f，它以n为参数，返回一个大小为n的列表，其中索引i处的元素值为i的阶乘（如果i为偶数）或1到i的数字之和（如果i为奇数）。i从1开始。i的阶乘是从1到i的数字的乘积（1 * 2 * ... * i）。例如：\n    ", "natural_language": "Chinese"}
49 | {"task_id": "python/48", "prompt": "\ndef even_odd_palindrome(n):\n    \"\"\"\n    给定一个正整数n，返回一个元组，其中包含在范围（1，n）内的偶数和奇数整数回文数的数量。\n\n        示例1：\n\n            输入：3\n        输出：（1，2）\n        说明：\n        整数回文是1、2、3。其中一个是偶数，两个是奇数。\n\n        示例2：\n\n            输入：12\n        输出：（4，6）\n        说明：\n        整数回文是1、2、3、4、5、6、7、8、9、11。其中4个是偶数，6个是奇数。\n\n        注意：\n        1. 1 <= n <= 10^3\n        2. 返回的元组分别是偶数和奇数整数回文数的数量。\n    \n    \"\"\"\n", "entry_point": "even_odd_palindrome", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(123) == (8, 13)\n    assert candidate(12) == (4, 6)\n    assert candidate(3) == (1, 2)\n    assert candidate(63) == (6, 8)\n    assert candidate(25) == (5, 6)\n    assert candidate(19) == (4, 6)\n    assert candidate(9) == (4, 5), \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1) == (0, 1), \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    def is_palindrome(n):\n        return str(n) == str(n)[::-1]\n\n    even_palindrome_count = 0\n    odd_palindrome_count = 0\n\n    for i in range(1, n+1):\n        if i%2 == 1 and is_palindrome(i):\n                odd_palindrome_count += 1\n        elif i%2 == 0 and is_palindrome(i):\n            even_palindrome_count += 1\n    return (even_palindrome_count, odd_palindrome_count)\n", "description": "给定一个正整数n，返回一个元组，其中包含在范围（1，n）内的偶数和奇数整数回文数的数量。\n\n        示例1：\n\n            输入：3\n        输出：（1，2）\n        说明：\n        整数回文是1、2、3。其中一个是偶数，两个是奇数。\n\n        示例2：\n\n            输入：12\n        输出：（4，6）\n        说明：\n        整数回文是1、2、3、4、5、6、7、8、9、11。其中4个是偶数，6个是奇数。\n\n        注意：\n        1. 1 <= n <= 10^3\n        2. 返回的元组分别是偶数和奇数整数回文数的数量。\n    ", "natural_language": "Chinese"}
50 | {"task_id": "python/49", "prompt": "\ndef move_one_ball(arr):\n    \"\"\"我们有一个由N个整数组成的数组'arr'，数组中的数字将是随机排序的。你的任务是确定是否可以通过对给定数组执行以下操作来获得按非递减顺序排序的数组：\n\n        您可以执行任意次数的右移操作。\n    \n    一个右移操作意味着将数组的所有元素向右移动一个位置。数组的最后一个元素将移动到数组的起始位置，即0号索引。\n\n        如果可以通过执行上述操作获得排序后的数组，则返回True，否则返回False。\n    如果给定的数组为空，则返回True。\n\n        注意：给定的列表保证具有唯一元素。\n\n        例如：\n    \n    move_one_ball([3, 4, 5, 1, 2])==>True\n    解释：通过执行2次右移操作，可以为给定数组实现非递减顺序。\n    move_one_ball([3, 5, 4, 1, 2])==>False\n    解释：通过执行任意数量的右移操作，无法为给定数组获得非递减顺序。\n    \n                \n    \"\"\"\n", "entry_point": "move_one_ball", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([3, 4, 5, 1, 2])==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([3, 5, 10, 1, 2])==True\n    assert candidate([4, 3, 1, 2])==False\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([3, 5, 4, 1, 2])==False, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([])==True\n", "language": "python", "canonical_solution": "    if len(arr)==0:\n      return True\n    sorted_array=sorted(arr)\n    my_arr=[]\n    \n    min_value=min(arr)\n    min_index=arr.index(min_value)\n    my_arr=arr[min_index:]+arr[0:min_index]\n    for i in range(len(arr)):\n      if my_arr[i]!=sorted_array[i]:\n        return False\n    return True\n", "description": "我们有一个由N个整数组成的数组'arr'，数组中的数字将是随机排序的。你的任务是确定是否可以通过对给定数组执行以下操作来获得按非递减顺序排序的数组：\n\n        您可以执行任意次数的右移操作。\n    \n    一个右移操作意味着将数组的所有元素向右移动一个位置。数组的最后一个元素将移动到数组的起始位置，即0号索引。\n\n        如果可以通过执行上述操作获得排序后的数组，则返回True，否则返回False。\n    如果给定的数组为空，则返回True。\n\n        注意：给定的列表保证具有唯一元素。\n\n        例如：\n    \n    move_one_ball([3, 4, 5, 1, 2])==>True\n    解释：通过执行2次右移操作，可以为给定数组实现非递减顺序。\n    move_one_ball([3, 5, 4, 1, 2])==>False\n    解释：通过执行任意数量的右移操作，无法为给定数组获得非递减顺序。\n    ", "natural_language": "Chinese"}
51 | {"task_id": "python/50", "prompt": "\ndef exchange(lst1, lst2):\n    \"\"\"在这个问题中，您将实现一个函数，该函数接受两个数字列表，并确定是否可以执行元素交换，使lst1成为仅包含偶数的列表。在lst1和lst2之间交换元素的数量没有限制。如果可以在lst1和lst2之间交换元素以使lst1的所有元素都是偶数，则返回“YES”。否则，返回“NO”。例如：exchange（[1,2,3,4]，[1,2,3,4]）=>“YES”exchange（[1,2,3,4]，[1,5,3,4]）=>“NO”假定输入列表将不为空。\n    \n    \"\"\"\n", "entry_point": "exchange", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 2, 3, 4], [1, 2, 3, 4]) == \"YES\"\n    assert candidate([1, 2, 3, 4], [1, 5, 3, 4]) == \"NO\"\n    assert candidate([1, 2, 3, 4], [2, 1, 4, 3]) == \"YES\" \n    assert candidate([5, 7, 3], [2, 6, 4]) == \"YES\"\n    assert candidate([5, 7, 3], [2, 6, 3]) == \"NO\" \n    assert candidate([3, 2, 6, 1, 8, 9], [3, 5, 5, 1, 1, 1]) == \"NO\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([100, 200], [200, 200]) == \"YES\"\n\n", "language": "python", "canonical_solution": "    odd = 0\n    even = 0\n    for i in lst1:\n        if i%2 == 1:\n            odd += 1\n    for i in lst2:\n        if i%2 == 0:\n            even += 1\n    if even >= odd:\n        return \"YES\"\n    return \"NO\"\n            \n", "description": "在这个问题中，您将实现一个函数，该函数接受两个数字列表，并确定是否可以执行元素交换，使lst1成为仅包含偶数的列表。在lst1和lst2之间交换元素的数量没有限制。如果可以在lst1和lst2之间交换元素以使lst1的所有元素都是偶数，则返回“YES”。否则，返回“NO”。例如：exchange（[1,2,3,4]，[1,2,3,4]）=>“YES”exchange（[1,2,3,4]，[1,5,3,4]）=>“NO”假定输入列表将不为空。\n    ", "natural_language": "Chinese"}
52 | {"task_id": "python/51", "prompt": "\ndef reverse_delete(s,c):\n    \"\"\"任务\n给定两个字符串s和c，您必须删除s中所有与c中任何字符相等的字符，然后检查结果字符串是否为回文。\n如果一个字符串从前往后读和从后往前读是一样的，那么这个字符串就是回文的。\n您应该返回一个包含结果字符串和True/False检查的元组。\n例子\n对于s =“abcde”，c =“ae”，结果应该是（'bcd'，False）\n对于s =“abcdef”，c =“b”，结果应该是（'acdef'，False）\n对于s =“abcdedcba”，c =“ab”，结果应该是（'cdedc'，True）\n    \n    \"\"\"\n", "entry_point": "reverse_delete", "test": "def check(candidate):\n\n    assert candidate(\"abcde\",\"ae\") == ('bcd',False)\n    assert candidate(\"abcdef\", \"b\") == ('acdef',False)\n    assert candidate(\"abcdedcba\",\"ab\") == ('cdedc',True)\n    assert candidate(\"dwik\",\"w\") == ('dik',False)\n    assert candidate(\"a\",\"a\") == ('',True)\n    assert candidate(\"abcdedcba\",\"\") == ('abcdedcba',True)\n    assert candidate(\"abcdedcba\",\"v\") == ('abcdedcba',True)\n    assert candidate(\"vabba\",\"v\") == ('abba',True)\n    assert candidate(\"mamma\", \"mia\") == (\"\", True)\n", "language": "python", "canonical_solution": "    s = ''.join([char for char in s if char not in c])\n    return (s,s[::-1] == s)\n", "description": "任务\n给定两个字符串s和c，您必须删除s中所有与c中任何字符相等的字符，然后检查结果字符串是否为回文。\n如果一个字符串从前往后读和从后往前读是一样的，那么这个字符串就是回文的。\n您应该返回一个包含结果字符串和True/False检查的元组。\n例子\n对于s =“abcde”，c =“ae”，结果应该是（'bcd'，False）\n对于s =“abcdef”，c =“b”，结果应该是（'acdef'，False）\n对于s =“abcdedcba”，c =“ab”，结果应该是（'cdedc'，True）\n    ", "natural_language": "Chinese"}
53 | {"task_id": "python/52", "prompt": "\ndef max_fill(grid, capacity):\n    import math\n    \"\"\"\n    你有一个井的矩形网格。每一行代表一个井，每一行中的1代表一个单位的水。每个井都有一个对应的桶，可以用来从中提取水，所有桶的容量相同。你的任务是使用桶来排空井。输出你需要降低桶的次数。\n\n    例1：\n输入：\n    grid：[[0,0,1,0]，[0,1,0,0]，[1,1,1,1]]\n    bucket_capacity：1\n输出：6\n\n    例2：\n输入：\n    grid：[[0,0,1,1]，[0,0,0,0]，[1,1,1,1]，[0,1,1,1]]\n    bucket_capacity：2\n输出：5\n\n    例3：\n输入：\n    grid：[[0,0,0]，[0,0,0]]\n    bucket_capacity：5\n输出：0\n\n    约束：\n*所有井的长度相同\n*1 <= grid.length <= 10^2\n*1 <= grid [:,1].length <= 10^2\n*grid [i] [j] -> 0 | 1\n*1 <= capacity <= 10\n    \n    \"\"\"\n", "entry_point": "max_fill", "test": "def check(candidate):\n\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([[0,0,1,0], [0,1,0,0], [1,1,1,1]], 1) == 6, \"Error\"\n    assert candidate([[0,0,1,1], [0,0,0,0], [1,1,1,1], [0,1,1,1]], 2) == 5, \"Error\"\n    assert candidate([[0,0,0], [0,0,0]], 5) == 0, \"Error\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([[1,1,1,1], [1,1,1,1]], 2) == 4, \"Error\"\n    assert candidate([[1,1,1,1], [1,1,1,1]], 9) == 2, \"Error\"\n\n", "language": "python", "canonical_solution": "    return sum([math.ceil(sum(arr)/capacity) for arr in grid])\n", "description": "你有一个井的矩形网格。每一行代表一个井，每一行中的1代表一个单位的水。每个井都有一个对应的桶，可以用来从中提取水，所有桶的容量相同。你的任务是使用桶来排空井。输出你需要降低桶的次数。\n\n    例1：\n输入：\n    grid：[[0,0,1,0]，[0,1,0,0]，[1,1,1,1]]\n    bucket_capacity：1\n输出：6\n\n    例2：\n输入：\n    grid：[[0,0,1,1]，[0,0,0,0]，[1,1,1,1]，[0,1,1,1]]\n    bucket_capacity：2\n输出：5\n\n    例3：\n输入：\n    grid：[[0,0,0]，[0,0,0]]\n    bucket_capacity：5\n输出：0\n\n    约束：\n*所有井的长度相同\n*1 <= grid.length <= 10^2\n*1 <= grid [:,1].length <= 10^2\n*grid [i] [j] -> 0 | 1\n*1 <= capacity <= 10\n    ", "natural_language": "Chinese"}
54 | {"task_id": "python/53", "prompt": "\ndef select_words(s, n):\n    \"\"\"给定一个字符串s和一个自然数n，你被要求实现一个函数，该函数返回字符串s中包含恰好n个辅音字母的所有单词的列表，按照它们在字符串s中出现的顺序。\n如果字符串s为空，则函数应返回一个空列表。\n注意：您可以假设输入字符串仅包含字母和空格。\n示例：\n    \n    select_words(\"Mary had a little lamb\", 4) ==> [\"little\"]\n    select_words(\"Mary had a little lamb\", 3) ==> [\"Mary\", \"lamb\"]\n    select_words(\"simple white space\", 2) ==> []\n    select_words(\"Hello world\", 4) ==> [\"world\"]\n    select_words(\"Uncle sam\", 3) ==> [\"Uncle\"]\n    \"\"\"\n", "entry_point": "select_words", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Mary had a little lamb\", 4) == [\"little\"], \"First test error: \" + str(candidate(\"Mary had a little lamb\", 4))      \n    assert candidate(\"Mary had a little lamb\", 3) == [\"Mary\", \"lamb\"], \"Second test error: \" + str(candidate(\"Mary had a little lamb\", 3))  \n    assert candidate(\"simple white space\", 2) == [], \"Third test error: \" + str(candidate(\"simple white space\", 2))      \n    assert candidate(\"Hello world\", 4) == [\"world\"], \"Fourth test error: \" + str(candidate(\"Hello world\", 4))  \n    assert candidate(\"Uncle sam\", 3) == [\"Uncle\"], \"Fifth test error: \" + str(candidate(\"Uncle sam\", 3))\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"\", 4) == [], \"1st edge test error: \" + str(candidate(\"\", 4))\n    assert candidate(\"a b c d e f\", 1) == [\"b\", \"c\", \"d\", \"f\"], \"2nd edge test error: \" + str(candidate(\"a b c d e f\", 1))\n\n", "language": "python", "canonical_solution": "    result = []\n    for word in s.split():\n        n_consonants = 0\n        for i in range(0, len(word)):\n            if word[i].lower() not in [\"a\",\"e\",\"i\",\"o\",\"u\"]:\n                n_consonants += 1 \n        if n_consonants == n:\n            result.append(word)\n    return result\n\n", "description": "给定一个字符串s和一个自然数n，你被要求实现一个函数，该函数返回字符串s中包含恰好n个辅音字母的所有单词的列表，按照它们在字符串s中出现的顺序。\n如果字符串s为空，则函数应返回一个空列表。\n注意：您可以假设输入字符串仅包含字母和空格。\n示例：\n    ", "natural_language": "Chinese"}
55 | {"task_id": "python/54", "prompt": "\ndef maximum(arr, k):\n    \"\"\"\n    给定一个整数数组arr和一个正整数k，返回一个长度为k的排序列表，其中包含arr中最大的k个数字。\n\n    示例1：\n\n        输入：arr = [-3，-4，5]，k = 3\n    输出：[-4，-3，5]\n\n    示例2：\n\n        输入：arr = [4，-4，4]，k = 2\n    输出：[4，4]\n\n    示例3：\n\n        输入：arr = [-3，2，1，2，-1，-2，1]，k = 1\n    输出：[2]\n\n    注意：\n\n        1.数组的长度将在[1，1000]范围内。\n    2.数组中的元素将在[-1000，1000]范围内。\n    3.0 <= k <= len（arr）\n    \n    \"\"\"\n", "entry_point": "maximum", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([-3, -4, 5], 3) == [-4, -3, 5]\n    assert candidate([4, -4, 4], 2) == [4, 4]\n    assert candidate([-3, 2, 1, 2, -1, -2, 1], 1) == [2]\n    assert candidate([123, -123, 20, 0 , 1, 2, -3], 3) == [2, 20, 123]\n    assert candidate([-123, 20, 0 , 1, 2, -3], 4) == [0, 1, 2, 20]\n    assert candidate([5, 15, 0, 3, -13, -8, 0], 7) == [-13, -8, 0, 0, 3, 5, 15]\n    assert candidate([-1, 0, 2, 5, 3, -10], 2) == [3, 5]\n    assert candidate([1, 0, 5, -7], 1) == [5]\n    assert candidate([4, -4], 2) == [-4, 4]\n    assert candidate([-10, 10], 2) == [-10, 10]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 3, -23, 243, -400, 0], 0) == []\n\n", "language": "python", "canonical_solution": "    if k == 0:\n        return []\n    arr.sort()\n    ans = arr[-k:]\n    return ans\n", "description": "给定一个整数数组arr和一个正整数k，返回一个长度为k的排序列表，其中包含arr中最大的k个数字。\n\n    示例1：\n\n        输入：arr = [-3，-4，5]，k = 3\n    输出：[-4，-3，5]\n\n    示例2：\n\n        输入：arr = [4，-4，4]，k = 2\n    输出：[4，4]\n\n    示例3：\n\n        输入：arr = [-3，2，1，2，-1，-2，1]，k = 1\n    输出：[2]\n\n    注意：\n\n        1.数组的长度将在[1，1000]范围内。\n    2.数组中的元素将在[-1000，1000]范围内。\n    3.0 <= k <= len（arr）\n    ", "natural_language": "Chinese"}
56 | {"task_id": "python/55", "prompt": "\ndef add_elements(arr, k):\n    \"\"\"\n    给定一个非空整数数组 arr 和一个整数 k，返回 arr 的前 k 个元素中最多有两位数的元素的和。\n\n    示例：\n\n        输入：arr = [111,21,3,4000,5,6,7,8,9]，k = 4\n    输出：24 # 21 + 3 的和\n\n    限制条件：\n    1. 1 <= len(arr) <= 100\n    2. 1 <= k <= len(arr)\n    \n    \"\"\"\n", "entry_point": "add_elements", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,-2,-3,41,57,76,87,88,99], 3) == -4\n    assert candidate([111,121,3,4000,5,6], 2) == 0\n    assert candidate([11,21,3,90,5,6,7,8,9], 4) == 125\n    assert candidate([111,21,3,4000,5,6,7,8,9], 4) == 24, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1], 1) == 1, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    return sum(elem for elem in arr[:k] if len(str(elem)) <= 2)\n", "description": "给定一个非空整数数组 arr 和一个整数 k，返回 arr 的前 k 个元素中最多有两位数的元素的和。\n\n    示例：\n\n        输入：arr = [111,21,3,4000,5,6,7,8,9]，k = 4\n    输出：24 # 21 + 3 的和\n\n    限制条件：\n    1. 1 <= len(arr) <= 100\n    2. 1 <= k <= len(arr)\n    ", "natural_language": "Chinese"}
57 | {"task_id": "python/56", "prompt": "\ndef intersection(interval1, interval2):\n    \"\"\"给定两个区间，\n    每个区间都是一对整数。例如，区间 = (起始，结束) = (1，2)。\n    给定的区间是闭合的，这意味着区间（起始，结束）\n    包括起始和结束。\n    对于每个给定的区间，假定其起始小于或等于其结束。\n    您的任务是确定这两个区间的交集长度是否为质数。\n    例如，区间（1，3），（2，4）的交集是（2，3）\n    其长度为1，不是质数。\n    如果交集的长度是质数，则返回“YES”，\n    否则返回“NO”。\n    如果两个区间不相交，则返回“NO”。\n\n        [输入/输出]示例：\n    \n    intersection((1, 2), (2, 3)) ==> \"NO\"\n    intersection((-1, 1), (0, 4)) ==> \"NO\"\n    intersection((-3, -1), (-5, 5)) ==> \"YES\"\n    \"\"\"\n", "entry_point": "intersection", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate((1, 2), (2, 3)) == \"NO\"\n    assert candidate((-1, 1), (0, 4)) == \"NO\"\n    assert candidate((-3, -1), (-5, 5)) == \"YES\"\n    assert candidate((-2, 2), (-4, 0)) == \"YES\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate((-11, 2), (-1, -1)) == \"NO\"\n    assert candidate((1, 2), (3, 5)) == \"NO\"\n    assert candidate((1, 2), (1, 2)) == \"NO\"\n    assert candidate((-2, -2), (-3, -2)) == \"NO\"\n\n", "language": "python", "canonical_solution": "    def is_prime(num):\n        if num == 1 or num == 0:\n            return False\n        if num == 2:\n            return True\n        for i in range(2, num):\n            if num%i == 0:\n                return False\n        return True\n\n    l = max(interval1[0], interval2[0])\n    r = min(interval1[1], interval2[1])\n    length = r - l\n    if length > 0 and is_prime(length):\n        return \"YES\"\n    return \"NO\"\n", "description": "给定两个区间，\n    每个区间都是一对整数。例如，区间 = (起始，结束) = (1，2)。\n    给定的区间是闭合的，这意味着区间（起始，结束）\n    包括起始和结束。\n    对于每个给定的区间，假定其起始小于或等于其结束。\n    您的任务是确定这两个区间的交集长度是否为质数。\n    例如，区间（1，3），（2，4）的交集是（2，3）\n    其长度为1，不是质数。\n    如果交集的长度是质数，则返回“YES”，\n    否则返回“NO”。\n    如果两个区间不相交，则返回“NO”。\n\n        [输入/输出]示例：\n    ", "natural_language": "Chinese"}
58 | {"task_id": "python/57", "prompt": "\ndef tri(n):\n    \"\"\"每个人都知道斐波那契数列，在过去的几个世纪里，数学家们对其进行了深入研究。然而，人们不知道的是特里波那契数列。特里波那契数列由递归定义：\ntri(1) = 3\ntri(n) = 1 + n / 2，如果n是偶数。\ntri(n) = tri(n - 1) + tri(n - 2) + tri(n + 1)，如果n是奇数。\n例如：\ntri(2) = 1 + (2 / 2) = 2\ntri(4) = 3\ntri(3) = tri(2) + tri(1) + tri(4)\n= 2 + 3 + 3 = 8\n给定一个非负整数n，您必须返回特里波那契数列的前n + 1个数字的列表。\n例子：\ntri(3) = [1, 3, 2, 8]\n    \n    \"\"\"\n", "entry_point": "tri", "test": "def check(candidate):\n\n    # Check some simple cases\n    \n    assert candidate(3) == [1, 3, 2.0, 8.0]\n    assert candidate(4) == [1, 3, 2.0, 8.0, 3.0]\n    assert candidate(5) == [1, 3, 2.0, 8.0, 3.0, 15.0]\n    assert candidate(6) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0]\n    assert candidate(7) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0]\n    assert candidate(8) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0, 5.0]\n    assert candidate(9) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0, 5.0, 35.0]\n    assert candidate(20) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0, 5.0, 35.0, 6.0, 48.0, 7.0, 63.0, 8.0, 80.0, 9.0, 99.0, 10.0, 120.0, 11.0]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(0) == [1]\n    assert candidate(1) == [1, 3]\n", "language": "python", "canonical_solution": "    if n == 0:\n        return [1]\n    my_tri = [1, 3]\n    for i in range(2, n + 1):\n        if i % 2 == 0:\n            my_tri.append(i / 2 + 1)\n        else:\n            my_tri.append(my_tri[i - 1] + my_tri[i - 2] + (i + 3) / 2)\n    return my_tri\n", "description": "每个人都知道斐波那契数列，在过去的几个世纪里，数学家们对其进行了深入研究。然而，人们不知道的是特里波那契数列。特里波那契数列由递归定义：\ntri(1) = 3\ntri(n) = 1 + n / 2，如果n是偶数。\ntri(n) = tri(n - 1) + tri(n - 2) + tri(n + 1)，如果n是奇数。\n例如：\ntri(2) = 1 + (2 / 2) = 2\ntri(4) = 3\ntri(3) = tri(2) + tri(1) + tri(4)\n= 2 + 3 + 3 = 8\n给定一个非负整数n，您必须返回特里波那契数列的前n + 1个数字的列表。\n例子：\ntri(3) = [1, 3, 2, 8]\n    ", "natural_language": "Chinese"}
59 | {"task_id": "python/58", "prompt": "\ndef digits(n):\n    \"\"\"给定一个正整数n，返回所有奇数位数字的乘积。\n如果所有数字都是偶数，则返回0。\n例如：\n    \n    digits(1)  == 1\n    digits(4)  == 0\n    digits(235) == 15\n    \"\"\"\n", "entry_point": "digits", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(5) == 5\n    assert candidate(54) == 5\n    assert candidate(120) ==1\n    assert candidate(5014) == 5\n    assert candidate(98765) == 315\n    assert candidate(5576543) == 2625\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(2468) == 0\n\n", "language": "python", "canonical_solution": "    product = 1\n    odd_count = 0\n    for digit in str(n):\n        int_digit = int(digit)\n        if int_digit%2 == 1:\n            product= product*int_digit\n            odd_count+=1\n    if odd_count ==0:\n        return 0\n    else:\n        return product\n", "description": "给定一个正整数n，返回所有奇数位数字的乘积。\n如果所有数字都是偶数，则返回0。\n例如：\n    ", "natural_language": "Chinese"}
60 | {"task_id": "python/59", "prompt": "\ndef is_nested(string):\n    '''\n    创建一个函数，该函数以仅包含方括号的字符串作为输入。\n    如果存在有效的括号子序列且至少有一个括号嵌套，则函数应返回True。\n    \n\n    is_nested('[[]]') ➞ True\n    is_nested('[]]]]]]][[[[[]') ➞ False\n    is_nested('[][]') ➞ False\n    is_nested('[]') ➞ False\n    is_nested('[[][]]') ➞ True\n    is_nested('[[]][[') ➞ True\n    '''\n", "entry_point": "is_nested", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('[[]]') == True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate('[]]]]]]][[[[[]') == False\n    assert candidate('[][]') == False\n    assert candidate(('[]')) == False\n    assert candidate('[[[[]]]]') == True\n    assert candidate('[]]]]]]]]]]') == False\n    assert candidate('[][][[]]') == True\n    assert candidate('[[]') == False\n    assert candidate('[]]') == False\n    assert candidate('[[]][[') == True\n    assert candidate('[[][]]') == True\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate('') == False, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate('[[[[[[[[') == False\n    assert candidate(']]]]]]]]') == False\n\n", "language": "python", "canonical_solution": "    opening_bracket_index = []\n    closing_bracket_index = []\n    for i in range(len(string)):\n        if string[i] == '[':\n            opening_bracket_index.append(i)\n        else:\n            closing_bracket_index.append(i)\n    closing_bracket_index.reverse()\n    cnt = 0\n    i = 0\n    l = len(closing_bracket_index)\n    for idx in opening_bracket_index:\n        if i < l and idx < closing_bracket_index[i]:\n            cnt += 1\n            i += 1\n    return cnt >= 2\n\n    \n", "description": "创建一个函数，该函数以仅包含方括号的字符串作为输入。\n    如果存在有效的括号子序列且至少有一个括号嵌套，则函数应返回True。\n    ", "natural_language": "Chinese"}
61 | {"task_id": "python/60", "prompt": "\n\ndef sum_squares(lst):\n    \"\"\"给定一个数字列表。\n你需要返回给定列表中平方数的总和，\n首先将列表中的每个元素四舍五入到上限整数（Ceiling）。\n例子：\n对于lst = [1,2,3]，输出应该是14\n对于lst = [1,4,9]，输出应该是98\n对于lst = [1,3,5,7]，输出应该是84\n对于lst = [1.4,4.2,0]，输出应该是29\n对于lst = [-2.4,1,1]，输出应该是6\n    \n    \n\n    \"\"\"\n", "entry_point": "sum_squares", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,2,3])==14, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1.0,2,3])==14, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1,3,5,7])==84, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1.4,4.2,0])==29, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-2.4,1,1])==6, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    assert candidate([100,1,15,2])==10230, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([10000,10000])==200000000, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-1.4,4.6,6.3])==75, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-1.4,17.9,18.9,19.9])==1086, \"This prints if this assert fails 1 (good for debugging!)\"\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([0])==0, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([-1])==1, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([-1,1,0])==2, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    import math\n    squared = 0\n    for i in lst:\n        squared += math.ceil(i)**2\n    return squared\n", "description": "给定一个数字列表。\n你需要返回给定列表中平方数的总和，\n首先将列表中的每个元素四舍五入到上限整数（Ceiling）。\n例子：\n对于lst = [1,2,3]，输出应该是14\n对于lst = [1,4,9]，输出应该是98\n对于lst = [1,3,5,7]，输出应该是84\n对于lst = [1.4,4.2,0]，输出应该是29\n对于lst = [-2.4,1,1]，输出应该是6\n    ", "natural_language": "Chinese"}
62 | {"task_id": "python/61", "prompt": "\ndef check_if_last_char_is_a_letter(txt):\n    '''\n    创建一个函数，如果给定字符串的最后一个字符是字母字符且不是单词的一部分，则返回True，否则返回False。\n注意：“单词”是由空格分隔的一组字符。\n\n    例子：\n    \n    check_if_last_char_is_a_letter(\"apple pie\") ➞ False\n    check_if_last_char_is_a_letter(\"apple pi e\") ➞ True\n    check_if_last_char_is_a_letter(\"apple pi e \") ➞ False\n    check_if_last_char_is_a_letter(\"\") ➞ False \n    '''\n", "entry_point": "check_if_last_char_is_a_letter", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"apple\") == False\n    assert candidate(\"apple pi e\") == True\n    assert candidate(\"eeeee\") == False\n    assert candidate(\"A\") == True\n    assert candidate(\"Pumpkin pie \") == False\n    assert candidate(\"Pumpkin pie 1\") == False\n    assert candidate(\"\") == False\n    assert candidate(\"eeeee e \") == False\n    assert candidate(\"apple pie\") == False\n    assert candidate(\"apple pi e \") == False\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n", "language": "python", "canonical_solution": " \n    check = txt.split(' ')[-1]\n    return True if len(check) == 1 and (97 <= ord(check.lower()) <= 122) else False\n", "description": "创建一个函数，如果给定字符串的最后一个字符是字母字符且不是单词的一部分，则返回True，否则返回False。\n注意：“单词”是由空格分隔的一组字符。\n\n    例子：\n    ", "natural_language": "Chinese"}
63 | {"task_id": "python/62", "prompt": "\ndef can_arrange(arr):\n    \"\"\"创建一个函数，该函数返回一个元素的最大索引，该元素不大于或等于其前面的元素。如果不存在这样的元素，则返回-1。给定的数组不包含重复值。\n\n    例子：\n    \n    can_arrange([1,2,4,3,5]) = 3\n    can_arrange([1,2,3]) = -1\n    \"\"\"\n", "entry_point": "can_arrange", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,2,4,3,5])==3\n    assert candidate([1,2,4,5])==-1\n    assert candidate([1,4,2,5,6,7,8,9,10])==2\n    assert candidate([4,8,5,7,3])==4\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([])==-1\n\n", "language": "python", "canonical_solution": "    ind=-1\n    i=1\n    while i<len(arr):\n      if arr[i]<arr[i-1]:\n        ind=i\n      i+=1\n    return ind\n", "description": "创建一个函数，该函数返回一个元素的最大索引，该元素不大于或等于其前面的元素。如果不存在这样的元素，则返回-1。给定的数组不包含重复值。\n\n    例子：\n    ", "natural_language": "Chinese"}
64 | {"task_id": "python/63", "prompt": "\ndef largest_smallest_integers(lst):\n    '''\n    创建一个函数，返回一个元组（a，b），其中'a'是负整数中最大的，'b'是正整数中最小的。如果列表中没有负数或正数，则将它们作为None返回。\n\n    例子：\n    \n    largest_smallest_integers([2, 4, 1, 3, 5, 7]) == (None, 1)\n    largest_smallest_integers([]) == (None, None)\n    largest_smallest_integers([0]) == (None, None)\n    '''\n", "entry_point": "largest_smallest_integers", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([2, 4, 1, 3, 5, 7]) == (None, 1)\n    assert candidate([2, 4, 1, 3, 5, 7, 0]) == (None, 1)\n    assert candidate([1, 3, 2, 4, 5, 6, -2]) == (-2, 1)\n    assert candidate([4, 5, 3, 6, 2, 7, -7]) == (-7, 2)\n    assert candidate([7, 3, 8, 4, 9, 2, 5, -9]) == (-9, 2)\n    assert candidate([]) == (None, None)\n    assert candidate([0]) == (None, None)\n    assert candidate([-1, -3, -5, -6]) == (-1, None)\n    assert candidate([-1, -3, -5, -6, 0]) == (-1, None)\n    assert candidate([-6, -4, -4, -3, 1]) == (-3, 1)\n    assert candidate([-6, -4, -4, -3, -100, 1]) == (-3, 1)\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n", "language": "python", "canonical_solution": "    smallest = list(filter(lambda x: x < 0, lst))\n    largest = list(filter(lambda x: x > 0, lst))\n    return (max(smallest) if smallest else None, min(largest) if largest else None)\n", "description": "创建一个函数，返回一个元组（a，b），其中'a'是负整数中最大的，'b'是正整数中最小的。如果列表中没有负数或正数，则将它们作为None返回。\n\n    例子：\n    ", "natural_language": "Chinese"}
65 | {"task_id": "python/64", "prompt": "\ndef special_factorial(n):\n    \"\"\"巴西阶乘被定义为：\n    巴西阶乘(n) = n! * (n-1)! * (n-2)! * ... * 1!\n    其中 n > 0\n\n        例如：\n    \n    >>> special_factorial(4)\n    288\n\n    The function will receive an integer as input and should return the special\n    factorial of this integer.\n    \"\"\"\n", "entry_point": "special_factorial", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(4) == 288, \"Test 4\"\n    assert candidate(5) == 34560, \"Test 5\"\n    assert candidate(7) == 125411328000, \"Test 7\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1) == 1, \"Test 1\"\n\n", "language": "python", "canonical_solution": "    fact_i = 1\n    special_fact = 1\n    for i in range(1, n+1):\n        fact_i *= i\n        special_fact *= fact_i\n    return special_fact\n", "description": "巴西阶乘被定义为：\n    巴西阶乘(n) = n! * (n-1)! * (n-2)! * ... * 1!\n    其中 n > 0\n\n        例如：\n    ", "natural_language": "Chinese"}
66 | {"task_id": "python/65", "prompt": "\ndef words_in_sentence(sentence):\n    \"\"\"\n    给定一个表示句子的字符串，\n句子包含一些由空格分隔的单词，\n您必须返回一个字符串，其中包含原始句子中长度为质数的单词，\n新字符串中单词的顺序应与原始字符串相同。\n\n    示例1：\n    输入：sentence = \"This is a test\"\n    输出：\"is\"\n\n    示例2：\n    输入：sentence = \"lets go for swimming\"\n    输出：\"go for\"\n\n    限制：\n    * 1 <= len(sentence) <= 100\n    * 句子只包含字母\n    \n    \"\"\"\n", "entry_point": "words_in_sentence", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"This is a test\") == \"is\"\n    assert candidate(\"lets go for swimming\") == \"go for\"\n    assert candidate(\"there is no place available here\") == \"there is no place\"\n    assert candidate(\"Hi I am Hussein\") == \"Hi am Hussein\"\n    assert candidate(\"go for it\") == \"go for it\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"here\") == \"\"\n    assert candidate(\"here is\") == \"is\"\n\n", "language": "python", "canonical_solution": "    new_lst = []\n    for word in sentence.split():\n        flg = 0\n        if len(word) == 1:\n            flg = 1\n        for i in range(2, len(word)):\n            if len(word)%i == 0:\n                flg = 1\n        if flg == 0 or len(word) == 2:\n            new_lst.append(word)\n    return \" \".join(new_lst)\n", "description": "给定一个表示句子的字符串，\n句子包含一些由空格分隔的单词，\n您必须返回一个字符串，其中包含原始句子中长度为质数的单词，\n新字符串中单词的顺序应与原始字符串相同。\n\n    示例1：\n    输入：sentence = \"This is a test\"\n    输出：\"is\"\n\n    示例2：\n    输入：sentence = \"lets go for swimming\"\n    输出：\"go for\"\n\n    限制：\n    * 1 <= len(sentence) <= 100\n    * 句子只包含字母\n    ", "natural_language": "Chinese"}
67 | {"task_id": "python/66", "prompt": "\ndef simplify(x, n):\n    \"\"\"你的任务是实现一个函数，简化表达式 x * n。如果 x * n 可以计算为整数，则函数返回 True，否则返回 False。x 和 n 都是分数的字符串表示形式，格式为 <分子>/<分母>，其中分子和分母都是正整数。\n\n    你可以假设 x 和 n 都是有效的分数，并且分母不为零。\n    \n\n    simplify(\"1/5\", \"5/1\") = True\n    simplify(\"1/6\", \"2/1\") = False\n    simplify(\"7/10\", \"10/2\") = False\n    \"\"\"\n", "entry_point": "simplify", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"1/5\", \"5/1\") == True, 'test1'\n    assert candidate(\"1/6\", \"2/1\") == False, 'test2'\n    assert candidate(\"5/1\", \"3/1\") == True, 'test3'\n    assert candidate(\"7/10\", \"10/2\") == False, 'test4'\n    assert candidate(\"2/10\", \"50/10\") == True, 'test5'\n    assert candidate(\"7/2\", \"4/2\") == True, 'test6'\n    assert candidate(\"11/6\", \"6/1\") == True, 'test7'\n    assert candidate(\"2/3\", \"5/2\") == False, 'test8'\n    assert candidate(\"5/2\", \"3/5\") == False, 'test9'\n    assert candidate(\"2/4\", \"8/4\") == True, 'test10'\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"2/4\", \"4/2\") == True, 'test11'\n    assert candidate(\"1/5\", \"5/1\") == True, 'test12'\n    assert candidate(\"1/5\", \"1/5\") == False, 'test13'\n\n", "language": "python", "canonical_solution": "    a, b = x.split(\"/\")\n    c, d = n.split(\"/\")\n    numerator = int(a) * int(c)\n    denom = int(b) * int(d)\n    if (numerator/denom == int(numerator/denom)):\n        return True\n    return False\n", "description": "你的任务是实现一个函数，简化表达式 x * n。如果 x * n 可以计算为整数，则函数返回 True，否则返回 False。x 和 n 都是分数的字符串表示形式，格式为 <分子>/<分母>，其中分子和分母都是正整数。\n\n    你可以假设 x 和 n 都是有效的分数，并且分母不为零。\n    ", "natural_language": "Chinese"}
68 | {"task_id": "python/67", "prompt": "\ndef order_by_points(nums):\n    \"\"\"\n    编写一个函数，根据数字的各位数字之和，将给定的整数列表按升序排序。\n注意：如果有多个数字的各位数字之和相似，则按照它们在原始列表中的索引排序。\n\n    例如：\n    \n    >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n    >>> order_by_points([]) == []\n    \"\"\"\n", "entry_point": "order_by_points", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n    assert candidate([1234,423,463,145,2,423,423,53,6,37,3457,3,56,0,46]) == [0, 2, 3, 6, 53, 423, 423, 423, 1234, 145, 37, 46, 56, 463, 3457]\n    assert candidate([]) == []\n    assert candidate([1, -11, -32, 43, 54, -98, 2, -3]) == [-3, -32, -98, -11, 1, 2, 43, 54]\n    assert candidate([1,2,3,4,5,6,7,8,9,10,11]) == [1, 10, 2, 11, 3, 4, 5, 6, 7, 8, 9]\n    assert candidate([0,6,6,-76,-21,23,4]) == [-76, -21, 0, 4, 23, 6, 6]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    def digits_sum(n):\n        neg = 1\n        if n < 0: n, neg = -1 * n, -1 \n        n = [int(i) for i in str(n)]\n        n[0] = n[0] * neg\n        return sum(n)\n    return sorted(nums, key=digits_sum)\n", "description": "编写一个函数，根据数字的各位数字之和，将给定的整数列表按升序排序。\n注意：如果有多个数字的各位数字之和相似，则按照它们在原始列表中的索引排序。\n\n    例如：\n    ", "natural_language": "Chinese"}
69 | {"task_id": "python/68", "prompt": "\ndef specialFilter(nums):\n    \"\"\"编写一个函数，它以数字数组作为输入，并返回数组中大于10且数字的第一个和最后一个数字都是奇数（1、3、5、7、9）的元素数量。例如：\n    \n    specialFilter([15, -73, 14, -15]) => 1 \n    specialFilter([33, -2, -3, 45, 21, 109]) => 2\n    \"\"\"\n", "entry_point": "specialFilter", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([5, -2, 1, -5]) == 0  \n    assert candidate([15, -73, 14, -15]) == 1\n    assert candidate([33, -2, -3, 45, 21, 109]) == 2\n    assert candidate([43, -12, 93, 125, 121, 109]) == 4\n    assert candidate([71, -2, -33, 75, 21, 19]) == 3\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1]) == 0              \n    assert candidate([]) == 0                   \n\n", "language": "python", "canonical_solution": "    \n    count = 0\n    for num in nums:\n        if num > 10:\n            odd_digits = (1, 3, 5, 7, 9)\n            number_as_string = str(num)\n            if int(number_as_string[0]) in odd_digits and int(number_as_string[-1]) in odd_digits:\n                count += 1\n        \n    return count \n", "description": "编写一个函数，它以数字数组作为输入，并返回数组中大于10且数字的第一个和最后一个数字都是奇数（1、3、5、7、9）的元素数量。例如：\n    ", "natural_language": "Chinese"}
70 | {"task_id": "python/69", "prompt": "\ndef get_max_triples(n):\n    \"\"\"\n    给定一个正整数n。你需要创建一个长度为n的整数数组a。\n对于每个i（1≤i≤n），a[i]的值=i * i-i + 1。\n返回a中i <j <k的三元组（a[i]，a[j]，a[k]）的数量，\n并且a[i] + a[j] + a[k]是3的倍数。\n\n    例子：\n输入：n = 5\n输出：1\n解释：\na = [1, 3, 7, 13, 21]\n唯一有效的三元组是（1，7，13）。\n    \n    \"\"\"\n", "entry_point": "get_max_triples", "test": "def check(candidate):\n\n    assert candidate(5) == 1\n    assert candidate(6) == 4\n    assert candidate(10) == 36\n    assert candidate(100) == 53361\n", "language": "python", "canonical_solution": "    A = [i*i - i + 1 for i in range(1,n+1)]\n    ans = []\n    for i in range(n):\n        for j in range(i+1,n):\n            for k in range(j+1,n):\n                if (A[i]+A[j]+A[k])%3 == 0:\n                    ans += [(A[i],A[j],A[k])]\n    return len(ans)\n", "description": "给定一个正整数n。你需要创建一个长度为n的整数数组a。\n对于每个i（1≤i≤n），a[i]的值=i * i-i + 1。\n返回a中i <j <k的三元组（a[i]，a[j]，a[k]）的数量，\n并且a[i] + a[j] + a[k]是3的倍数。\n\n    例子：\n输入：n = 5\n输出：1\n解释：\na = [1, 3, 7, 13, 21]\n唯一有效的三元组是（1，7，13）。\n    ", "natural_language": "Chinese"}
71 | {"task_id": "python/70", "prompt": "\ndef bf(planet1, planet2):\n    '''\n    我们的太阳系中有八颗行星：最靠近太阳的是水星，其次是金星，然后是地球、火星、木星、土星、天王星和海王星。编写一个函数，该函数接受两个行星名称作为字符串planet1和planet2。该函数应返回一个元组，其中包含所有轨道位于planet1和planet2之间的行星，按距离太阳的近度排序。如果planet1或planet2不是正确的行星名称，则函数应返回一个空元组。示例\n    \n    bf(\"Jupiter\", \"Neptune\") ==> (\"Saturn\", \"Uranus\")\n    bf(\"Earth\", \"Mercury\") ==> (\"Venus\")\n    bf(\"Mercury\", \"Uranus\") ==> (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\")\n    '''\n", "entry_point": "bf", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Jupiter\", \"Neptune\") == (\"Saturn\", \"Uranus\"), \"First test error: \" + str(len(candidate(\"Jupiter\", \"Neptune\")))      \n    assert candidate(\"Earth\", \"Mercury\") == (\"Venus\",), \"Second test error: \" + str(candidate(\"Earth\", \"Mercury\"))  \n    assert candidate(\"Mercury\", \"Uranus\") == (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\"), \"Third test error: \" + str(candidate(\"Mercury\", \"Uranus\"))      \n    assert candidate(\"Neptune\", \"Venus\") == (\"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\"), \"Fourth test error: \" + str(candidate(\"Neptune\", \"Venus\"))  \n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"Earth\", \"Earth\") == ()\n    assert candidate(\"Mars\", \"Earth\") == ()\n    assert candidate(\"Jupiter\", \"Makemake\") == ()\n\n", "language": "python", "canonical_solution": "    planet_names = (\"Mercury\", \"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\", \"Neptune\")\n    if planet1 not in planet_names or planet2 not in planet_names or planet1 == planet2:\n        return ()\n    planet1_index = planet_names.index(planet1)\n    planet2_index = planet_names.index(planet2)\n    if planet1_index < planet2_index:\n        return (planet_names[planet1_index + 1: planet2_index])\n    else:\n        return (planet_names[planet2_index + 1 : planet1_index])\n", "description": "我们的太阳系中有八颗行星：最靠近太阳的是水星，其次是金星，然后是地球、火星、木星、土星、天王星和海王星。编写一个函数，该函数接受两个行星名称作为字符串planet1和planet2。该函数应返回一个元组，其中包含所有轨道位于planet1和planet2之间的行星，按距离太阳的近度排序。如果planet1或planet2不是正确的行星名称，则函数应返回一个空元组。示例\n    ", "natural_language": "Chinese"}
72 | {"task_id": "python/71", "prompt": "\ndef x_or_y(n, x, y):\n    \"\"\"一个简单的程序，如果n是质数，则应返回x的值，否则应返回y的值。\n\n    例子：\n    \n    for x_or_y(7, 34, 12) == 34\n    for x_or_y(15, 8, 5) == 5\n    \n    \"\"\"\n", "entry_point": "x_or_y", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(7, 34, 12) == 34\n    assert candidate(15, 8, 5) == 5\n    assert candidate(3, 33, 5212) == 33\n    assert candidate(1259, 3, 52) == 3\n    assert candidate(7919, -1, 12) == -1\n    assert candidate(3609, 1245, 583) == 583\n    assert candidate(91, 56, 129) == 129\n    assert candidate(6, 34, 1234) == 1234\n    \n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 2, 0) == 0\n    assert candidate(2, 2, 0) == 2\n\n", "language": "python", "canonical_solution": "    if n == 1:\n        return y\n    for i in range(2, n):\n        if n % i == 0:\n            return y\n            break\n    else:\n        return x\n", "description": "一个简单的程序，如果n是质数，则应返回x的值，否则应返回y的值。\n\n    例子：\n    ", "natural_language": "Chinese"}
73 | {"task_id": "python/72", "prompt": "\ndef double_the_difference(lst):\n    '''\n    给定一个数字列表，返回列表中奇数的平方和。忽略负数或非整数。\n\n    double_the_difference([1, 3, 2, 0]) == 1 + 9 + 0 + 0 = 10\ndouble_the_difference([-1, -2, 0]) == 0\ndouble_the_difference([9, -2]) == 81\ndouble_the_difference([0]) == 0\n\n    如果输入列表为空，则返回0。\n    \n    '''\n", "entry_point": "double_the_difference", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([]) == 0 , \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([5, 4]) == 25 , \"This prints if this assert fails 2 (good for debugging!)\"\n    assert candidate([0.1, 0.2, 0.3]) == 0 , \"This prints if this assert fails 3 (good for debugging!)\"\n    assert candidate([-10, -20, -30]) == 0 , \"This prints if this assert fails 4 (good for debugging!)\"\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([-1, -2, 8]) == 0, \"This prints if this assert fails 5 (also good for debugging!)\"\n    assert candidate([0.2, 3, 5]) == 34, \"This prints if this assert fails 6 (also good for debugging!)\"\n    lst = list(range(-99, 100, 2))\n    odd_sum = sum([i**2 for i in lst if i%2!=0 and i > 0])\n    assert candidate(lst) == odd_sum , \"This prints if this assert fails 7 (good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    return sum([i**2 for i in lst if i > 0 and i%2!=0 and \".\" not in str(i)])\n", "description": "给定一个数字列表，返回列表中奇数的平方和。忽略负数或非整数。\n\n    double_the_difference([1, 3, 2, 0]) == 1 + 9 + 0 + 0 = 10\ndouble_the_difference([-1, -2, 0]) == 0\ndouble_the_difference([9, -2]) == 81\ndouble_the_difference([0]) == 0\n\n    如果输入列表为空，则返回0。\n    ", "natural_language": "Chinese"}
74 | {"task_id": "python/73", "prompt": "\ndef Strongest_Extension(class_name, extensions):\n    \"\"\"你将得到一个类的名称（一个字符串）和一个扩展名列表。扩展名用于加载附加的类到该类中。扩展名的强度如下：让CAP为扩展名中大写字母的数量，让SM为扩展名中小写字母的数量，强度由CAP-SM分数给出。您应该找到最强的扩展名并返回一个字符串，格式为：ClassName.StrongestExtensionName。如果有两个或更多的扩展名具有相同的强度，则应选择列表中先出现的扩展名。例如，如果您给出“Slices”作为类和扩展名列表：['SErviNGSliCes'，'Cheese'，'StuFfed']，则应返回'Slices.SErviNGSliCes'，因为'SErviNGSliCes'是最强的扩展名（其强度为-1）。示例：\n    \n    for Strongest_Extension('my_class', ['AA', 'Be', 'CC']) == 'my_class.AA'\n    \"\"\"\n", "entry_point": "Strongest_Extension", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('Watashi', ['tEN', 'niNE', 'eIGHt8OKe']) == 'Watashi.eIGHt8OKe'\n    assert candidate('Boku123', ['nani', 'NazeDa', 'YEs.WeCaNe', '32145tggg']) == 'Boku123.YEs.WeCaNe'\n    assert candidate('__YESIMHERE', ['t', 'eMptY', 'nothing', 'zeR00', 'NuLl__', '123NoooneB321']) == '__YESIMHERE.NuLl__'\n    assert candidate('K', ['Ta', 'TAR', 't234An', 'cosSo']) == 'K.TAR'\n    assert candidate('__HAHA', ['Tab', '123', '781345', '-_-']) == '__HAHA.123'\n    assert candidate('YameRore', ['HhAas', 'okIWILL123', 'WorkOut', 'Fails', '-_-']) == 'YameRore.okIWILL123'\n    assert candidate('finNNalLLly', ['Die', 'NowW', 'Wow', 'WoW']) == 'finNNalLLly.WoW'\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate('_', ['Bb', '91245']) == '_.Bb'\n    assert candidate('Sp', ['671235', 'Bb']) == 'Sp.671235'\n    \n", "language": "python", "canonical_solution": "    strong = extensions[0]\n    my_val = len([x for x in extensions[0] if x.isalpha() and x.isupper()]) - len([x for x in extensions[0] if x.isalpha() and x.islower()])\n    for s in extensions:\n        val = len([x for x in s if x.isalpha() and x.isupper()]) - len([x for x in s if x.isalpha() and x.islower()])\n        if val > my_val:\n            strong = s\n            my_val = val\n\n    ans = class_name + \".\" + strong\n    return ans\n\n", "description": "你将得到一个类的名称（一个字符串）和一个扩展名列表。扩展名用于加载附加的类到该类中。扩展名的强度如下：让CAP为扩展名中大写字母的数量，让SM为扩展名中小写字母的数量，强度由CAP-SM分数给出。您应该找到最强的扩展名并返回一个字符串，格式为：ClassName.StrongestExtensionName。如果有两个或更多的扩展名具有相同的强度，则应选择列表中先出现的扩展名。例如，如果您给出“Slices”作为类和扩展名列表：['SErviNGSliCes'，'Cheese'，'StuFfed']，则应返回'Slices.SErviNGSliCes'，因为'SErviNGSliCes'是最强的扩展名（其强度为-1）。示例：\n    ", "natural_language": "Chinese"}
75 | {"task_id": "python/74", "prompt": "\ndef cycpattern_check(a , b):\n    \"\"\"给定两个单词。如果第二个单词或其任何旋转是第一个单词的子字符串，则需要返回True。\n    \n    cycpattern_check(\"abcd\",\"abd\") => False\n    cycpattern_check(\"hello\",\"ell\") => True\n    cycpattern_check(\"whassup\",\"psus\") => False\n    cycpattern_check(\"abab\",\"baa\") => True\n    cycpattern_check(\"efef\",\"eeff\") => False\n    cycpattern_check(\"himenss\",\"simen\") => True\n\n    \"\"\"\n", "entry_point": "cycpattern_check", "test": "def check(candidate):\n\n    # Check some simple cases\n    #assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    #assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert  candidate(\"xyzw\",\"xyw\") == False , \"test #0\"\n    assert  candidate(\"yello\",\"ell\") == True , \"test #1\"\n    assert  candidate(\"whattup\",\"ptut\") == False , \"test #2\"\n    assert  candidate(\"efef\",\"fee\") == True , \"test #3\"\n    assert  candidate(\"abab\",\"aabb\") == False , \"test #4\"\n    assert  candidate(\"winemtt\",\"tinem\") == True , \"test #5\"\n\n", "language": "python", "canonical_solution": "    l = len(b)\n    pat = b + b\n    for i in range(len(a) - l + 1):\n        for j in range(l + 1):\n            if a[i:i+l] == pat[j:j+l]:\n                return True\n    return False\n", "description": "给定两个单词。如果第二个单词或其任何旋转是第一个单词的子字符串，则需要返回True。\n    ", "natural_language": "Chinese"}
76 | {"task_id": "python/75", "prompt": "\ndef int_to_mini_roman(number):\n    \"\"\"\n    给定一个正整数，将其转换为罗马数字字符串，并以小写形式返回。\n限制条件：1 <= num <= 1000\n\n    示例：\n    \n    >>> int_to_mini_roman(19) == 'xix'\n    >>> int_to_mini_roman(152) == 'clii'\n    >>> int_to_mini_roman(426) == 'cdxxvi'\n    \"\"\"\n", "entry_point": "int_to_mini_roman", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(19) == 'xix'\n    assert candidate(152) == 'clii'\n    assert candidate(251) == 'ccli'\n    assert candidate(426) == 'cdxxvi'\n    assert candidate(500) == 'd'\n    assert candidate(1) == 'i'\n    assert candidate(4) == 'iv'\n    assert candidate(43) == 'xliii'\n    assert candidate(90) == 'xc'\n    assert candidate(94) == 'xciv'\n    assert candidate(532) == 'dxxxii'\n    assert candidate(900) == 'cm'\n    assert candidate(994) == 'cmxciv'\n    assert candidate(1000) == 'm'\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n", "language": "python", "canonical_solution": "    num = [1, 4, 5, 9, 10, 40, 50, 90,  \n           100, 400, 500, 900, 1000] \n    sym = [\"I\", \"IV\", \"V\", \"IX\", \"X\", \"XL\",  \n           \"L\", \"XC\", \"C\", \"CD\", \"D\", \"CM\", \"M\"] \n    i = 12\n    res = ''\n    while number: \n        div = number // num[i] \n        number %= num[i] \n        while div: \n            res += sym[i] \n            div -= 1\n        i -= 1\n    return res.lower()\n", "description": "给定一个正整数，将其转换为罗马数字字符串，并以小写形式返回。\n限制条件：1 <= num <= 1000\n\n    示例：\n    ", "natural_language": "Chinese"}
77 | {"task_id": "python/76", "prompt": "\ndef right_angle_triangle(a, b, c):\n    '''\n    给定三角形的三条边长。如果这三条边可以组成一个直角三角形，则返回True，否则返回False。\n    直角三角形是一个其中一个角是直角或90度的三角形。\n    例子：\n    \n    right_angle_triangle(3, 4, 5) == True\n    right_angle_triangle(1, 2, 3) == False\n    '''\n", "entry_point": "right_angle_triangle", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(3, 4, 5) == True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(1, 2, 3) == False\n    assert candidate(10, 6, 8) == True\n    assert candidate(2, 2, 2) == False\n    assert candidate(7, 24, 25) == True\n    assert candidate(10, 5, 7) == False\n    assert candidate(5, 12, 13) == True\n    assert candidate(15, 8, 17) == True\n    assert candidate(48, 55, 73) == True\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1, 1) == False, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(2, 2, 10) == False\n\n", "language": "python", "canonical_solution": "    return a*a == b*b + c*c or b*b == a*a + c*c or c*c == a*a + b*b\n", "description": "给定三角形的三条边长。如果这三条边可以组成一个直角三角形，则返回True，否则返回False。\n    直角三角形是一个其中一个角是直角或90度的三角形。\n    例子：\n    ", "natural_language": "Chinese"}
78 | {"task_id": "python/77", "prompt": "\ndef solve(s):\n    \"\"\"给定一个字符串s。\n如果s[i]是一个字母，将其大小写反转，从小写变为大写或反之亦然，\n否则保持不变。\n如果字符串不包含字母，则反转字符串。\n函数应返回结果字符串。\n例子\n    \n    solve(\"1234\") = \"4321\"\n    solve(\"ab\") = \"AB\"\n    solve(\"#a@C\") = \"#A@c\"\n    \"\"\"\n", "entry_point": "solve", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"AsDf\") == \"aSdF\"\n    assert candidate(\"1234\") == \"4321\"\n    assert candidate(\"ab\") == \"AB\"\n    assert candidate(\"#a@C\") == \"#A@c\"\n    assert candidate(\"#AsdfW^45\") == \"#aSDFw^45\"\n    assert candidate(\"#6@2\") == \"2@6#\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"#$a^D\") == \"#$A^d\"\n    assert candidate(\"#ccc\") == \"#CCC\"\n\n    # Don't remove this line:\n", "language": "python", "canonical_solution": "    flg = 0\n    idx = 0\n    new_str = list(s)\n    for i in s:\n        if i.isalpha():\n            new_str[idx] = i.swapcase()\n            flg = 1\n        idx += 1\n    s = \"\"\n    for i in new_str:\n        s += i\n    if flg == 0:\n        return s[len(s)::-1]\n    return s\n", "description": "给定一个字符串s。\n如果s[i]是一个字母，将其大小写反转，从小写变为大写或反之亦然，\n否则保持不变。\n如果字符串不包含字母，则反转字符串。\n函数应返回结果字符串。\n例子\n    ", "natural_language": "Chinese"}
79 | {"task_id": "python/78", "prompt": "\ndef string_to_md5(text):\n    \"\"\"\n    给定一个字符串“text”，返回其MD5哈希等效字符串。\n如果“text”是一个空字符串，则返回null。\n    \n\n    >>> string_to_md5('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n    \"\"\"\n", "entry_point": "string_to_md5", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n    assert candidate('') == None\n    assert candidate('A B C') == '0ef78513b0cb8cef12743f5aeb35f888'\n    assert candidate('password') == '5f4dcc3b5aa765d61d8327deb882cf99'\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n", "language": "python", "canonical_solution": "    import hashlib\n    return hashlib.md5(text.encode('ascii')).hexdigest() if text else None\n", "description": "给定一个字符串“text”，返回其MD5哈希等效字符串。\n如果“text”是一个空字符串，则返回null。\n    ", "natural_language": "Chinese"}
80 | {"task_id": "python/79", "prompt": "\ndef generate_integers(a, b):\n    \"\"\"\n    给定两个正整数a和b，返回a和b之间的偶数位数字，按升序排列。\n\n    例如：\n    \n    generate_integers(2, 8) => [2, 4, 6, 8]\n    generate_integers(8, 2) => [2, 4, 6, 8]\n    generate_integers(10, 14) => []\n    \"\"\"\n", "entry_point": "generate_integers", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(2, 10) == [2, 4, 6, 8], \"Test 1\"\n    assert candidate(10, 2) == [2, 4, 6, 8], \"Test 2\"\n    assert candidate(132, 2) == [2, 4, 6, 8], \"Test 3\"\n    assert candidate(17,89) == [], \"Test 4\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": "    lower = max(2, min(a, b))\n    upper = min(8, max(a, b))\n\n    return [i for i in range(lower, upper+1) if i % 2 == 0]\n", "description": "给定两个正整数a和b，返回a和b之间的偶数位数字，按升序排列。\n\n    例如：\n    ", "natural_language": "Chinese"}
81 | 


--------------------------------------------------------------------------------
/mxeval/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.egg-info/*
3 | .DS_Store


--------------------------------------------------------------------------------
/mxeval/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | Changelog
2 | 
3 | ## v1.1
4 | * Bugfix:<br>
5 | Fix unit test cases for 47 problems’ test assertions for C#/TypeScript/Go, which represents ~5% of all problems:<br> 
6 | Root cause of the issue is a possibility for the input parameters to the canonical solutions get mutated as a side-affect which cause the captured input to mismatch.<br>
7 | We fix this issue by saving another copy of the function input before passing it for execution.


--------------------------------------------------------------------------------
/mxeval/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/mxeval/NOTICE:
--------------------------------------------------------------------------------
1 | MBXP+: Multi-lingual Execution-Based Evaluation
2 | 
3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.


--------------------------------------------------------------------------------
/mxeval/THIRD_PARTY_LICENSES:
--------------------------------------------------------------------------------
  1 | The Multi-lingual Execution-Based Evaluation repository includes the following third-party software/licensing:
  2 | 
  3 | ** (1) in mxeval/, we release Multi-lingual Execution-Based Evaluation we adapt the HumanEval code repository into a multi-lingual version which supports evaluation of all our datasets. The original code and dataset are from https://github.com/openai/human-eval.
  4 | (2) In data/multilingual_humaneval, we release Multi-lingual HumanEval where we adapt the HumanEval dataset by OpenAI into multiple datasets in different programming languages. The original code and dataset are from https://github.com/openai/human-eval/tree/master/data.
  5 | 
  6 | The MIT License
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this software and associated documentation files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | 
 26 | 
 27 | ----------------
 28 | 
 29 | 
 30 | ** In data/mbxp, we release MBXP we adapt the MBPP dataset by Google into multiple datasets in different programming languages. The original datasets are from https://github.com/google-research/google-research/tree/master/mbpp and https://huggingface.co/datasets/mbpp.
 31 | 
 32 | 
 33 | Attribution 4.0 International
 34 | 
 35 | =======================================================================
 36 | 
 37 | Creative Commons Corporation ("Creative Commons") is not a law firm and
 38 | does not provide legal services or legal advice. Distribution of
 39 | Creative Commons public licenses does not create a lawyer-client or
 40 | other relationship. Creative Commons makes its licenses and related
 41 | information available on an "as-is" basis. Creative Commons gives no
 42 | warranties regarding its licenses, any material licensed under their
 43 | terms and conditions, or any related information. Creative Commons
 44 | disclaims all liability for damages resulting from their use to the
 45 | fullest extent possible.
 46 | 
 47 | Using Creative Commons Public Licenses
 48 | 
 49 | Creative Commons public licenses provide a standard set of terms and
 50 | conditions that creators and other rights holders may use to share
 51 | original works of authorship and other material subject to copyright
 52 | and certain other rights specified in the public license below. The
 53 | following considerations are for informational purposes only, are not
 54 | exhaustive, and do not form part of our licenses.
 55 | 
 56 |      Considerations for licensors: Our public licenses are
 57 |      intended for use by those authorized to give the public
 58 |      permission to use material in ways otherwise restricted by
 59 |      copyright and certain other rights. Our licenses are
 60 |      irrevocable. Licensors should read and understand the terms
 61 |      and conditions of the license they choose before applying it.
 62 |      Licensors should also secure all rights necessary before
 63 |      applying our licenses so that the public can reuse the
 64 |      material as expected. Licensors should clearly mark any
 65 |      material not subject to the license. This includes other CC-
 66 |      licensed material, or material used under an exception or
 67 |      limitation to copyright. More considerations for licensors:
 68 | 	wiki.creativecommons.org/Considerations_for_licensors
 69 | 
 70 |      Considerations for the public: By using one of our public
 71 |      licenses, a licensor grants the public permission to use the
 72 |      licensed material under specified terms and conditions. If
 73 |      the licensor's permission is not necessary for any reason--for
 74 |      example, because of any applicable exception or limitation to
 75 |      copyright--then that use is not regulated by the license. Our
 76 |      licenses grant only permissions under copyright and certain
 77 |      other rights that a licensor has authority to grant. Use of
 78 |      the licensed material may still be restricted for other
 79 |      reasons, including because others have copyright or other
 80 |      rights in the material. A licensor may make special requests,
 81 |      such as asking that all changes be marked or described.
 82 |      Although not required by our licenses, you are encouraged to
 83 |      respect those requests where reasonable. More_considerations
 84 |      for the public:
 85 | 	wiki.creativecommons.org/Considerations_for_licensees
 86 | 
 87 | =======================================================================
 88 | 
 89 | Creative Commons Attribution 4.0 International Public License
 90 | 
 91 | By exercising the Licensed Rights (defined below), You accept and agree
 92 | to be bound by the terms and conditions of this Creative Commons
 93 | Attribution 4.0 International Public License ("Public License"). To the
 94 | extent this Public License may be interpreted as a contract, You are
 95 | granted the Licensed Rights in consideration of Your acceptance of
 96 | these terms and conditions, and the Licensor grants You such rights in
 97 | consideration of benefits the Licensor receives from making the
 98 | Licensed Material available under these terms and conditions.
 99 | 
100 | 
101 | Section 1 -- Definitions.
102 | 
103 |   a. Adapted Material means material subject to Copyright and Similar
104 |      Rights that is derived from or based upon the Licensed Material
105 |      and in which the Licensed Material is translated, altered,
106 |      arranged, transformed, or otherwise modified in a manner requiring
107 |      permission under the Copyright and Similar Rights held by the
108 |      Licensor. For purposes of this Public License, where the Licensed
109 |      Material is a musical work, performance, or sound recording,
110 |      Adapted Material is always produced where the Licensed Material is
111 |      synched in timed relation with a moving image.
112 | 
113 |   b. Adapter's License means the license You apply to Your Copyright
114 |      and Similar Rights in Your contributions to Adapted Material in
115 |      accordance with the terms and conditions of this Public License.
116 | 
117 |   c. Copyright and Similar Rights means copyright and/or similar rights
118 |      closely related to copyright including, without limitation,
119 |      performance, broadcast, sound recording, and Sui Generis Database
120 |      Rights, without regard to how the rights are labeled or
121 |      categorized. For purposes of this Public License, the rights
122 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
123 |      Rights.
124 | 
125 |   d. Effective Technological Measures means those measures that, in the
126 |      absence of proper authority, may not be circumvented under laws
127 |      fulfilling obligations under Article 11 of the WIPO Copyright
128 |      Treaty adopted on December 20, 1996, and/or similar international
129 |      agreements.
130 | 
131 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
132 |      any other exception or limitation to Copyright and Similar Rights
133 |      that applies to Your use of the Licensed Material.
134 | 
135 |   f. Licensed Material means the artistic or literary work, database,
136 |      or other material to which the Licensor applied this Public
137 |      License.
138 | 
139 |   g. Licensed Rights means the rights granted to You subject to the
140 |      terms and conditions of this Public License, which are limited to
141 |      all Copyright and Similar Rights that apply to Your use of the
142 |      Licensed Material and that the Licensor has authority to license.
143 | 
144 |   h. Licensor means the individual(s) or entity(ies) granting rights
145 |      under this Public License.
146 | 
147 |   i. Share means to provide material to the public by any means or
148 |      process that requires permission under the Licensed Rights, such
149 |      as reproduction, public display, public performance, distribution,
150 |      dissemination, communication, or importation, and to make material
151 |      available to the public including in ways that members of the
152 |      public may access the material from a place and at a time
153 |      individually chosen by them.
154 | 
155 |   j. Sui Generis Database Rights means rights other than copyright
156 |      resulting from Directive 96/9/EC of the European Parliament and of
157 |      the Council of 11 March 1996 on the legal protection of databases,
158 |      as amended and/or succeeded, as well as other essentially
159 |      equivalent rights anywhere in the world.
160 | 
161 |   k. You means the individual or entity exercising the Licensed Rights
162 |      under this Public License. Your has a corresponding meaning.
163 | 
164 | 
165 | Section 2 -- Scope.
166 | 
167 |   a. License grant.
168 | 
169 |        1. Subject to the terms and conditions of this Public License,
170 |           the Licensor hereby grants You a worldwide, royalty-free,
171 |           non-sublicensable, non-exclusive, irrevocable license to
172 |           exercise the Licensed Rights in the Licensed Material to:
173 | 
174 |             a. reproduce and Share the Licensed Material, in whole or
175 |                in part; and
176 | 
177 |             b. produce, reproduce, and Share Adapted Material.
178 | 
179 |        2. Exceptions and Limitations. For the avoidance of doubt, where
180 |           Exceptions and Limitations apply to Your use, this Public
181 |           License does not apply, and You do not need to comply with
182 |           its terms and conditions.
183 | 
184 |        3. Term. The term of this Public License is specified in Section
185 |           6(a).
186 | 
187 |        4. Media and formats; technical modifications allowed. The
188 |           Licensor authorizes You to exercise the Licensed Rights in
189 |           all media and formats whether now known or hereafter created,
190 |           and to make technical modifications necessary to do so. The
191 |           Licensor waives and/or agrees not to assert any right or
192 |           authority to forbid You from making technical modifications
193 |           necessary to exercise the Licensed Rights, including
194 |           technical modifications necessary to circumvent Effective
195 |           Technological Measures. For purposes of this Public License,
196 |           simply making modifications authorized by this Section 2(a)
197 |           (4) never produces Adapted Material.
198 | 
199 |        5. Downstream recipients.
200 | 
201 |             a. Offer from the Licensor -- Licensed Material. Every
202 |                recipient of the Licensed Material automatically
203 |                receives an offer from the Licensor to exercise the
204 |                Licensed Rights under the terms and conditions of this
205 |                Public License.
206 | 
207 |             b. No downstream restrictions. You may not offer or impose
208 |                any additional or different terms or conditions on, or
209 |                apply any Effective Technological Measures to, the
210 |                Licensed Material if doing so restricts exercise of the
211 |                Licensed Rights by any recipient of the Licensed
212 |                Material.
213 | 
214 |        6. No endorsement. Nothing in this Public License constitutes or
215 |           may be construed as permission to assert or imply that You
216 |           are, or that Your use of the Licensed Material is, connected
217 |           with, or sponsored, endorsed, or granted official status by,
218 |           the Licensor or others designated to receive attribution as
219 |           provided in Section 3(a)(1)(A)(i).
220 | 
221 |   b. Other rights.
222 | 
223 |        1. Moral rights, such as the right of integrity, are not
224 |           licensed under this Public License, nor are publicity,
225 |           privacy, and/or other similar personality rights; however, to
226 |           the extent possible, the Licensor waives and/or agrees not to
227 |           assert any such rights held by the Licensor to the limited
228 |           extent necessary to allow You to exercise the Licensed
229 |           Rights, but not otherwise.
230 | 
231 |        2. Patent and trademark rights are not licensed under this
232 |           Public License.
233 | 
234 |        3. To the extent possible, the Licensor waives any right to
235 |           collect royalties from You for the exercise of the Licensed
236 |           Rights, whether directly or through a collecting society
237 |           under any voluntary or waivable statutory or compulsory
238 |           licensing scheme. In all other cases the Licensor expressly
239 |           reserves any right to collect such royalties.
240 | 
241 | 
242 | Section 3 -- License Conditions.
243 | 
244 | Your exercise of the Licensed Rights is expressly made subject to the
245 | following conditions.
246 | 
247 |   a. Attribution.
248 | 
249 |        1. If You Share the Licensed Material (including in modified
250 |           form), You must:
251 | 
252 |             a. retain the following if it is supplied by the Licensor
253 |                with the Licensed Material:
254 | 
255 |                  i. identification of the creator(s) of the Licensed
256 |                     Material and any others designated to receive
257 |                     attribution, in any reasonable manner requested by
258 |                     the Licensor (including by pseudonym if
259 |                     designated);
260 | 
261 |                 ii. a copyright notice;
262 | 
263 |                iii. a notice that refers to this Public License;
264 | 
265 |                 iv. a notice that refers to the disclaimer of
266 |                     warranties;
267 | 
268 |                  v. a URI or hyperlink to the Licensed Material to the
269 |                     extent reasonably practicable;
270 | 
271 |             b. indicate if You modified the Licensed Material and
272 |                retain an indication of any previous modifications; and
273 | 
274 |             c. indicate the Licensed Material is licensed under this
275 |                Public License, and include the text of, or the URI or
276 |                hyperlink to, this Public License.
277 | 
278 |        2. You may satisfy the conditions in Section 3(a)(1) in any
279 |           reasonable manner based on the medium, means, and context in
280 |           which You Share the Licensed Material. For example, it may be
281 |           reasonable to satisfy the conditions by providing a URI or
282 |           hyperlink to a resource that includes the required
283 |           information.
284 | 
285 |        3. If requested by the Licensor, You must remove any of the
286 |           information required by Section 3(a)(1)(A) to the extent
287 |           reasonably practicable.
288 | 
289 |        4. If You Share Adapted Material You produce, the Adapter's
290 |           License You apply must not prevent recipients of the Adapted
291 |           Material from complying with this Public License.
292 | 
293 | 
294 | Section 4 -- Sui Generis Database Rights.
295 | 
296 | Where the Licensed Rights include Sui Generis Database Rights that
297 | apply to Your use of the Licensed Material:
298 | 
299 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
300 |      to extract, reuse, reproduce, and Share all or a substantial
301 |      portion of the contents of the database;
302 | 
303 |   b. if You include all or a substantial portion of the database
304 |      contents in a database in which You have Sui Generis Database
305 |      Rights, then the database in which You have Sui Generis Database
306 |      Rights (but not its individual contents) is Adapted Material; and
307 | 
308 |   c. You must comply with the conditions in Section 3(a) if You Share
309 |      all or a substantial portion of the contents of the database.
310 | 
311 | For the avoidance of doubt, this Section 4 supplements and does not
312 | replace Your obligations under this Public License where the Licensed
313 | Rights include other Copyright and Similar Rights.
314 | 
315 | 
316 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
317 | 
318 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
319 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
320 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
321 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
322 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
323 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
324 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
325 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
326 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
327 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
328 | 
329 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
330 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
331 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
332 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
333 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
334 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
335 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
336 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
337 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
338 | 
339 |   c. The disclaimer of warranties and limitation of liability provided
340 |      above shall be interpreted in a manner that, to the extent
341 |      possible, most closely approximates an absolute disclaimer and
342 |      waiver of all liability.
343 | 
344 | 
345 | Section 6 -- Term and Termination.
346 | 
347 |   a. This Public License applies for the term of the Copyright and
348 |      Similar Rights licensed here. However, if You fail to comply with
349 |      this Public License, then Your rights under this Public License
350 |      terminate automatically.
351 | 
352 |   b. Where Your right to use the Licensed Material has terminated under
353 |      Section 6(a), it reinstates:
354 | 
355 |        1. automatically as of the date the violation is cured, provided
356 |           it is cured within 30 days of Your discovery of the
357 |           violation; or
358 | 
359 |        2. upon express reinstatement by the Licensor.
360 | 
361 |      For the avoidance of doubt, this Section 6(b) does not affect any
362 |      right the Licensor may have to seek remedies for Your violations
363 |      of this Public License.
364 | 
365 |   c. For the avoidance of doubt, the Licensor may also offer the
366 |      Licensed Material under separate terms or conditions or stop
367 |      distributing the Licensed Material at any time; however, doing so
368 |      will not terminate this Public License.
369 | 
370 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
371 |      License.
372 | 
373 | 
374 | Section 7 -- Other Terms and Conditions.
375 | 
376 |   a. The Licensor shall not be bound by any additional or different
377 |      terms or conditions communicated by You unless expressly agreed.
378 | 
379 |   b. Any arrangements, understandings, or agreements regarding the
380 |      Licensed Material not stated herein are separate from and
381 |      independent of the terms and conditions of this Public License.
382 | 
383 | 
384 | Section 8 -- Interpretation.
385 | 
386 |   a. For the avoidance of doubt, this Public License does not, and
387 |      shall not be interpreted to, reduce, limit, restrict, or impose
388 |      conditions on any use of the Licensed Material that could lawfully
389 |      be made without permission under this Public License.
390 | 
391 |   b. To the extent possible, if any provision of this Public License is
392 |      deemed unenforceable, it shall be automatically reformed to the
393 |      minimum extent necessary to make it enforceable. If the provision
394 |      cannot be reformed, it shall be severed from this Public License
395 |      without affecting the enforceability of the remaining terms and
396 |      conditions.
397 | 
398 |   c. No term or condition of this Public License will be waived and no
399 |      failure to comply consented to unless expressly agreed to by the
400 |      Licensor.
401 | 
402 |   d. Nothing in this Public License constitutes or may be interpreted
403 |      as a limitation upon, or waiver of, any privileges and immunities
404 |      that apply to the Licensor or You, including from the legal
405 |      processes of any jurisdiction or authority.
406 | 
407 | 
408 | =======================================================================
409 | 
410 | Creative Commons is not a party to its public
411 | licenses. Notwithstanding, Creative Commons may elect to apply one of
412 | its public licenses to material it publishes and in those instances
413 | will be considered the “Licensor.” The text of the Creative Commons
414 | public licenses is dedicated to the public domain under the CC0 Public
415 | Domain Dedication. Except for the limited purpose of indicating that
416 | material is shared under a Creative Commons public license or as
417 | otherwise permitted by the Creative Commons policies published at
418 | creativecommons.org/policies, Creative Commons does not authorize the
419 | use of the trademark "Creative Commons" or any other trademark or logo
420 | of Creative Commons without its prior written consent including,
421 | without limitation, in connection with any unauthorized modifications
422 | to any of its public licenses or any other arrangements,
423 | understandings, or agreements concerning use of licensed material. For
424 | the avoidance of doubt, this paragraph does not form part of the
425 | public licenses.
426 | 
427 | Creative Commons may be contacted at creativecommons.org.
428 | 
429 | ----------------
430 | 
431 | 
432 | ** In data/multilingual_mathqa, we release Multi-lingual MathQA where we adapt the MathQA-Python dataset by Google into multiple datasets in different programming languages. The original code and dataset are from https://github.com/google/trax/blob/master/trax/examples/MathQA_Python_generation_notebook.ipynb
433 | 
434 | 
435 |                                  Apache License
436 |                            Version 2.0, January 2004
437 |                         http://www.apache.org/licenses/
438 | 
439 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
440 | 
441 |    1. Definitions.
442 | 
443 |       "License" shall mean the terms and conditions for use, reproduction,
444 |       and distribution as defined by Sections 1 through 9 of this document.
445 | 
446 |       "Licensor" shall mean the copyright owner or entity authorized by
447 |       the copyright owner that is granting the License.
448 | 
449 |       "Legal Entity" shall mean the union of the acting entity and all
450 |       other entities that control, are controlled by, or are under common
451 |       control with that entity. For the purposes of this definition,
452 |       "control" means (i) the power, direct or indirect, to cause the
453 |       direction or management of such entity, whether by contract or
454 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
455 |       outstanding shares, or (iii) beneficial ownership of such entity.
456 | 
457 |       "You" (or "Your") shall mean an individual or Legal Entity
458 |       exercising permissions granted by this License.
459 | 
460 |       "Source" form shall mean the preferred form for making modifications,
461 |       including but not limited to software source code, documentation
462 |       source, and configuration files.
463 | 
464 |       "Object" form shall mean any form resulting from mechanical
465 |       transformation or translation of a Source form, including but
466 |       not limited to compiled object code, generated documentation,
467 |       and conversions to other media types.
468 | 
469 |       "Work" shall mean the work of authorship, whether in Source or
470 |       Object form, made available under the License, as indicated by a
471 |       copyright notice that is included in or attached to the work
472 |       (an example is provided in the Appendix below).
473 | 
474 |       "Derivative Works" shall mean any work, whether in Source or Object
475 |       form, that is based on (or derived from) the Work and for which the
476 |       editorial revisions, annotations, elaborations, or other modifications
477 |       represent, as a whole, an original work of authorship. For the purposes
478 |       of this License, Derivative Works shall not include works that remain
479 |       separable from, or merely link (or bind by name) to the interfaces of,
480 |       the Work and Derivative Works thereof.
481 | 
482 |       "Contribution" shall mean any work of authorship, including
483 |       the original version of the Work and any modifications or additions
484 |       to that Work or Derivative Works thereof, that is intentionally
485 |       submitted to Licensor for inclusion in the Work by the copyright owner
486 |       or by an individual or Legal Entity authorized to submit on behalf of
487 |       the copyright owner. For the purposes of this definition, "submitted"
488 |       means any form of electronic, verbal, or written communication sent
489 |       to the Licensor or its representatives, including but not limited to
490 |       communication on electronic mailing lists, source code control systems,
491 |       and issue tracking systems that are managed by, or on behalf of, the
492 |       Licensor for the purpose of discussing and improving the Work, but
493 |       excluding communication that is conspicuously marked or otherwise
494 |       designated in writing by the copyright owner as "Not a Contribution."
495 | 
496 |       "Contributor" shall mean Licensor and any individual or Legal Entity
497 |       on behalf of whom a Contribution has been received by Licensor and
498 |       subsequently incorporated within the Work.
499 | 
500 |    2. Grant of Copyright License. Subject to the terms and conditions of
501 |       this License, each Contributor hereby grants to You a perpetual,
502 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
503 |       copyright license to reproduce, prepare Derivative Works of,
504 |       publicly display, publicly perform, sublicense, and distribute the
505 |       Work and such Derivative Works in Source or Object form.
506 | 
507 |    3. Grant of Patent License. Subject to the terms and conditions of
508 |       this License, each Contributor hereby grants to You a perpetual,
509 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
510 |       (except as stated in this section) patent license to make, have made,
511 |       use, offer to sell, sell, import, and otherwise transfer the Work,
512 |       where such license applies only to those patent claims licensable
513 |       by such Contributor that are necessarily infringed by their
514 |       Contribution(s) alone or by combination of their Contribution(s)
515 |       with the Work to which such Contribution(s) was submitted. If You
516 |       institute patent litigation against any entity (including a
517 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
518 |       or a Contribution incorporated within the Work constitutes direct
519 |       or contributory patent infringement, then any patent licenses
520 |       granted to You under this License for that Work shall terminate
521 |       as of the date such litigation is filed.
522 | 
523 |    4. Redistribution. You may reproduce and distribute copies of the
524 |       Work or Derivative Works thereof in any medium, with or without
525 |       modifications, and in Source or Object form, provided that You
526 |       meet the following conditions:
527 | 
528 |       (a) You must give any other recipients of the Work or
529 |           Derivative Works a copy of this License; and
530 | 
531 |       (b) You must cause any modified files to carry prominent notices
532 |           stating that You changed the files; and
533 | 
534 |       (c) You must retain, in the Source form of any Derivative Works
535 |           that You distribute, all copyright, patent, trademark, and
536 |           attribution notices from the Source form of the Work,
537 |           excluding those notices that do not pertain to any part of
538 |           the Derivative Works; and
539 | 
540 |       (d) If the Work includes a "NOTICE" text file as part of its
541 |           distribution, then any Derivative Works that You distribute must
542 |           include a readable copy of the attribution notices contained
543 |           within such NOTICE file, excluding those notices that do not
544 |           pertain to any part of the Derivative Works, in at least one
545 |           of the following places: within a NOTICE text file distributed
546 |           as part of the Derivative Works; within the Source form or
547 |           documentation, if provided along with the Derivative Works; or,
548 |           within a display generated by the Derivative Works, if and
549 |           wherever such third-party notices normally appear. The contents
550 |           of the NOTICE file are for informational purposes only and
551 |           do not modify the License. You may add Your own attribution
552 |           notices within Derivative Works that You distribute, alongside
553 |           or as an addendum to the NOTICE text from the Work, provided
554 |           that such additional attribution notices cannot be construed
555 |           as modifying the License.
556 | 
557 |       You may add Your own copyright statement to Your modifications and
558 |       may provide additional or different license terms and conditions
559 |       for use, reproduction, or distribution of Your modifications, or
560 |       for any such Derivative Works as a whole, provided Your use,
561 |       reproduction, and distribution of the Work otherwise complies with
562 |       the conditions stated in this License.
563 | 
564 |    5. Submission of Contributions. Unless You explicitly state otherwise,
565 |       any Contribution intentionally submitted for inclusion in the Work
566 |       by You to the Licensor shall be under the terms and conditions of
567 |       this License, without any additional terms or conditions.
568 |       Notwithstanding the above, nothing herein shall supersede or modify
569 |       the terms of any separate license agreement you may have executed
570 |       with Licensor regarding such Contributions.
571 | 
572 |    6. Trademarks. This License does not grant permission to use the trade
573 |       names, trademarks, service marks, or product names of the Licensor,
574 |       except as required for reasonable and customary use in describing the
575 |       origin of the Work and reproducing the content of the NOTICE file.
576 | 
577 |    7. Disclaimer of Warranty. Unless required by applicable law or
578 |       agreed to in writing, Licensor provides the Work (and each
579 |       Contributor provides its Contributions) on an "AS IS" BASIS,
580 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
581 |       implied, including, without limitation, any warranties or conditions
582 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
583 |       PARTICULAR PURPOSE. You are solely responsible for determining the
584 |       appropriateness of using or redistributing the Work and assume any
585 |       risks associated with Your exercise of permissions under this License.
586 | 
587 |    8. Limitation of Liability. In no event and under no legal theory,
588 |       whether in tort (including negligence), contract, or otherwise,
589 |       unless required by applicable law (such as deliberate and grossly
590 |       negligent acts) or agreed to in writing, shall any Contributor be
591 |       liable to You for damages, including any direct, indirect, special,
592 |       incidental, or consequential damages of any character arising as a
593 |       result of this License or out of the use or inability to use the
594 |       Work (including but not limited to damages for loss of goodwill,
595 |       work stoppage, computer failure or malfunction, or any and all
596 |       other commercial damages or losses), even if such Contributor
597 |       has been advised of the possibility of such damages.
598 | 
599 |    9. Accepting Warranty or Additional Liability. While redistributing
600 |       the Work or Derivative Works thereof, You may choose to offer,
601 |       and charge a fee for, acceptance of support, warranty, indemnity,
602 |       or other liability obligations and/or rights consistent with this
603 |       License. However, in accepting such obligations, You may act only
604 |       on Your own behalf and on Your sole responsibility, not on behalf
605 |       of any other Contributor, and only if You agree to indemnify,
606 |       defend, and hold each Contributor harmless for any liability
607 |       incurred by, or claims asserted against, such Contributor by reason
608 |       of your accepting any such warranty or additional liability.
609 | 
610 |    END OF TERMS AND CONDITIONS
611 | 
612 |    APPENDIX: How to apply the Apache License to your work.
613 | 
614 |       To apply the Apache License to your work, attach the following
615 |       boilerplate notice, with the fields enclosed by brackets "[]"
616 |       replaced with your own identifying information. (Don't include
617 |       the brackets!)  The text should be enclosed in the appropriate
618 |       comment syntax for the file format. We also recommend that a
619 |       file or class name and description of purpose be included on the
620 |       same "printed page" as the copyright notice for easier
621 |       identification within third-party archives.
622 | 
623 |    Copyright [yyyy] [name of copyright owner]
624 | 
625 |    Licensed under the Apache License, Version 2.0 (the "License");
626 |    you may not use this file except in compliance with the License.
627 |    You may obtain a copy of the License at
628 | 
629 |        http://www.apache.org/licenses/LICENSE-2.0
630 | 
631 |    Unless required by applicable law or agreed to in writing, software
632 |    distributed under the License is distributed on an "AS IS" BASIS,
633 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
634 |    See the License for the specific language governing permissions and
635 |    limitations under the License.
636 | 


--------------------------------------------------------------------------------
/mxeval/graphics/mbxp_java_conversion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/graphics/mbxp_java_conversion.png


--------------------------------------------------------------------------------
/mxeval/graphics/paper_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/graphics/paper_summary.png


--------------------------------------------------------------------------------
/mxeval/language_setup/amazon_linux_ami.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/bash
  2 | 
  3 | printf "%100s" " " | tr ' ' '-'
  4 | echo ""
  5 | echo "setting up C++ "
  6 | printf "%100s" " " | tr ' ' '-'
  7 | echo ""
  8 | sudo yum install -y gcc-c++
  9 | 
 10 | printf "%100s" " " | tr ' ' '-'
 11 | echo ""
 12 | echo "setting up Ruby "
 13 | printf "%100s" " " | tr ' ' '-'
 14 | echo ""
 15 | sudo amazon-linux-extras install -y ruby3.0
 16 | 
 17 | printf "%100s" " " | tr ' ' '-'
 18 | echo ""
 19 | echo "setting up php "
 20 | printf "%100s" " " | tr ' ' '-'
 21 | echo ""
 22 | # ref https://techviewleo.com/install-php-8-on-amazon-linux/
 23 | sudo yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
 24 | sudo yum -y install https://rpms.remirepo.net/enterprise/remi-release-7.rpm
 25 | 
 26 | sudo yum makecache
 27 | 
 28 | sudo yum -y install yum-utils
 29 | sudo yum-config-manager --disable 'remi-php*'
 30 | sudo amazon-linux-extras enable php8.0
 31 | sudo yum clean metadata
 32 | sudo yum install -y php-{pear,cgi,pdo,common,curl,mbstring,gd,mysqlnd,gettext,bcmath,json,xml,fpm,intl,zip}
 33 | 
 34 | printf "%100s" " " | tr ' ' '-'
 35 | echo ""
 36 | echo "setting up Java "
 37 | printf "%100s" " " | tr ' ' '-'
 38 | echo ""
 39 | # ref https://docs.aws.amazon.com/corretto/latest/corretto-8-ug/amazon-linux-install.html
 40 | sudo amazon-linux-extras enable corretto8
 41 | sudo yum install -y java-1.8.0-amazon-corretto-devel
 42 | 
 43 | printf "%100s" " " | tr ' ' '-'
 44 | echo ""
 45 | echo "setting up JavaScript "
 46 | printf "%100s" " " | tr ' ' '-'
 47 | echo ""
 48 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
 49 | . ~/.nvm/nvm.sh
 50 | nvm install 16.10.0
 51 | node -e "console.log('Running Node.js ' + process.version)"
 52 | npm i -g npm
 53 | npm install -g lodash
 54 | npm i --save lodash
 55 | 
 56 | 
 57 | printf "%100s" " " | tr ' ' '-'
 58 | echo ""
 59 | echo "setting up TypeScript "
 60 | printf "%100s" " " | tr ' ' '-'
 61 | echo ""
 62 | npm install -g typescript
 63 | 
 64 | cd /usr/local
 65 | printf "%100s" " " | tr ' ' '-'
 66 | echo ""
 67 | echo "setting up Go "
 68 | printf "%100s" " " | tr ' ' '-'
 69 | echo ""
 70 | sudo wget https://go.dev/dl/go1.19.1.linux-amd64.tar.gz && sudo tar -xzvf go1.19.1.linux-amd64.tar.gz && sudo rm go1.19.1.linux-amd64.tar.gz
 71 | printf "%100s" " " | tr ' ' '-'
 72 | echo ""
 73 | echo "setting up Swift "
 74 | printf "%100s" " " | tr ' ' '-'
 75 | echo ""
 76 | sudo wget https://download.swift.org/swift-5.7-release/amazonlinux2/swift-5.7-RELEASE/swift-5.7-RELEASE-amazonlinux2.tar.gz && sudo tar -xzvf swift-5.7-RELEASE-amazonlinux2.tar.gz && sudo rm swift-5.7-RELEASE-amazonlinux2.tar.gz
 77 | 
 78 | cd ~
 79 | printf "%100s" " " | tr ' ' '-'
 80 | echo ""
 81 | echo "setting up Scala "
 82 | printf "%100s" " " | tr ' ' '-'
 83 | echo ""
 84 | wget http://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.rpm
 85 | sudo yum -y install scala-2.11.8.rpm
 86 | 
 87 | printf "%100s" " " | tr ' ' '-'
 88 | echo ""
 89 | echo "setting up C# "
 90 | printf "%100s" " " | tr ' ' '-'
 91 | echo ""
 92 | sudo rpm -Uvh https://packages.microsoft.com/config/centos/7/packages-microsoft-prod.rpm
 93 | sudo yum install -y dotnet-sdk-6.0
 94 | 
 95 | printf "%100s" " " | tr ' ' '-'
 96 | echo ""
 97 | echo "setting up Perl "
 98 | printf "%100s" " " | tr ' ' '-'
 99 | echo ""
100 | sudo yum install -y perl-CPAN
101 | perl -MCPAN -e 'install Data::Compare'
102 | 
103 | printf "%100s" " " | tr ' ' '-'
104 | echo ""
105 | echo "setting up Kotlin "
106 | printf "%100s" " " | tr ' ' '-'
107 | echo ""
108 | curl -s https://get.sdkman.io | bash
109 | export SDKMAN_DIR="$HOME/.sdkman"
110 | [[ -s "$HOME/.sdkman/bin/sdkman-init.sh" ]] && source "$HOME/.sdkman/bin/sdkman-init.sh"
111 | sdk install kotlin
112 | 
113 | # writing out updated PATH to ~/.bashrc
114 | echo 'export PATH="${PATH}:/usr/local/go/bin:/usr/local/swift-5.7-RELEASE-amazonlinux2/usr/bin"' >> ~/.bashrc
115 | 
116 | printf "%100s" " " | tr ' ' '-'
117 | echo ""
118 | echo 'Installation complete. Please start a new terminal session for changes to take place.'
119 | printf "%100s" " " | tr ' ' '-'
120 | echo ""
121 | 


--------------------------------------------------------------------------------
/mxeval/language_setup/ubuntu.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/bash
  2 | 
  3 | printf "%100s" " " | tr ' ' '-'
  4 | echo ""
  5 | echo "setting up Ruby "
  6 | printf "%100s" " " | tr ' ' '-'
  7 | echo ""
  8 | sudo apt update
  9 | sudo apt install git curl libssl-dev libreadline-dev zlib1g-dev autoconf bison build-essential        libyaml-dev libreadline-dev libncurses5-dev libffi-dev libgdbm-dev
 10 | curl -fsSL https://github.com/rbenv/rbenv-installer/raw/HEAD/bin/rbenv-installer | bash
 11 | echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bashrc
 12 | echo 'eval "$(rbenv init -)"' >> ~/.bashrc
 13 | source ~/.bashrc
 14 | rbenv install 3.0.0
 15 | rbenv global 3.0.0
 16 | 
 17 | 
 18 | printf "%100s" " " | tr ' ' '-'
 19 | echo ""
 20 | echo "setting up php "
 21 | printf "%100s" " " | tr ' ' '-'
 22 | echo ""
 23 | sudo apt install software-properties-common ca-certificates lsb-release apt-transport-https
 24 | sudo add-apt-repository ppa:ondrej/php
 25 | sudo apt update
 26 | sudo apt install php8.0
 27 | sudo apt install -y php-{pear,cgi,pdo,common,curl,mbstring,gd,mysqlnd,gettext,bcmath,json,xml,fpm,intl,zip}
 28 | 
 29 | 
 30 | printf "%100s" " " | tr ' ' '-'
 31 | echo ""
 32 | echo "setting up Java "
 33 | printf "%100s" " " | tr ' ' '-'
 34 | echo ""
 35 | sudo apt-get install openjdk-8-jdk
 36 | 
 37 | 
 38 | printf "%100s" " " | tr ' ' '-'
 39 | echo ""
 40 | echo "setting up JavaScript "
 41 | printf "%100s" " " | tr ' ' '-'
 42 | echo ""
 43 | sudo apt install curl
 44 | curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash
 45 | source ~/.bashrc
 46 | sudo apt install npm
 47 | nvm install 16.10.0
 48 | node -e "console.log('Running Node.js ' + process.version)"
 49 | npm i -g npm
 50 | npm install -g lodash
 51 | npm i --save lodash
 52 | 
 53 | 
 54 | printf "%100s" " " | tr ' ' '-'
 55 | echo ""
 56 | echo "setting up TypeScript "
 57 | printf "%100s" " " | tr ' ' '-'
 58 | echo ""
 59 | sudo npm install -g typescript
 60 | 
 61 | cd /usr/local
 62 | printf "%100s" " " | tr ' ' '-'
 63 | echo ""
 64 | echo "setting up Go "
 65 | printf "%100s" " " | tr ' ' '-'
 66 | echo ""
 67 | # https://go.dev/doc/install
 68 | sudo wget https://go.dev/dl/go1.19.1.linux-amd64.tar.gz && sudo tar -xzvf go1.19.1.linux-amd64.tar.gz && sudo rm go1.19.1.linux-amd64.tar.gz
 69 | printf "%100s" " " | tr ' ' '-'
 70 | echo ""
 71 | echo "setting up Swift "
 72 | printf "%100s" " " | tr ' ' '-'
 73 | echo ""
 74 | swift_release="swift-5.7-RELEASE-ubuntu20.04.tar.gz"
 75 | sudo wget "https://download.swift.org/swift-5.7-release/ubuntu2004/swift-5.7-RELEASE/$swift_release" && sudo tar -xzvf $swift_release && sudo rm $swift_release
 76 | 
 77 | cd ~
 78 | printf "%100s" " " | tr ' ' '-'
 79 | echo ""
 80 | echo "setting up Scala "
 81 | printf "%100s" " " | tr ' ' '-'
 82 | echo ""
 83 | sudo apt-get install scala
 84 | 
 85 | 
 86 | printf "%100s" " " | tr ' ' '-'
 87 | echo ""
 88 | echo "setting up C# "
 89 | printf "%100s" " " | tr ' ' '-'
 90 | echo ""
 91 | sudo apt-get update && \
 92 |   sudo apt-get install -y dotnet6
 93 | 
 94 | printf "%100s" " " | tr ' ' '-'
 95 | echo ""
 96 | echo "setting up Perl "
 97 | printf "%100s" " " | tr ' ' '-'
 98 | echo ""
 99 | perl -MCPAN -e 'install Data::Compare'
100 | 
101 | printf "%100s" " " | tr ' ' '-'
102 | echo ""
103 | echo "setting up Kotlin "
104 | printf "%100s" " " | tr ' ' '-'
105 | echo ""
106 | sudo apt install zip
107 | sudo apt install unzip
108 | curl -s https://get.sdkman.io | bash
109 | export SDKMAN_DIR="$HOME/.sdkman"
110 | [[ -s "$HOME/.sdkman/bin/sdkman-init.sh" ]] && source "$HOME/.sdkman/bin/sdkman-init.sh"
111 | sdk install kotlin
112 | 
113 | 
114 | # writing out updated PATH to ~/.bashrc
115 | echo 'PATH="${PATH}:/usr/local/swift-5.7-RELEASE-ubuntu20.04/usr/bin:/usr/local/go/bin"' >> ~/.bashrc
116 | 
117 | printf "%100s" " " | tr ' ' '-'
118 | echo ""
119 | echo 'Installation complete. Please start a new terminal session for changes to take place.'
120 | printf "%100s" " " | tr ' ' '-'
121 | echo ""
122 | 


--------------------------------------------------------------------------------
/mxeval/mxeval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/mxeval/__init__.py


--------------------------------------------------------------------------------
/mxeval/mxeval/data.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable, Dict
  2 | import gzip
  3 | import json
  4 | import os
  5 | 
  6 | 
  7 | ROOT = os.path.dirname(os.path.abspath(__file__))
  8 | HUMAN_EVAL = os.path.join(ROOT, "..", "data", "HumanEval.jsonl.gz")
  9 | 
 10 | 
 11 | def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]:
 12 |     return {task["task_id"]: task for task in stream_jsonl(evalset_file)}
 13 | 
 14 | 
 15 | def stream_jsonl(filename: str) -> Iterable[Dict]:
 16 |     """
 17 |     Parses each jsonl line and yields it as a dictionary
 18 |     """
 19 |     if filename.endswith(".gz"):
 20 |         with open(filename, "rb") as gzfp:
 21 |             with gzip.open(gzfp, 'rt') as fp:
 22 |                 for line in fp:
 23 |                     if any(not x.isspace() for x in line):
 24 |                         yield json.loads(line)
 25 |     else:
 26 |         with open(filename, "r") as fp:
 27 |             for line in fp:
 28 |                 if any(not x.isspace() for x in line):
 29 |                     yield json.loads(line)
 30 | 
 31 | 
 32 | def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False):
 33 |     """
 34 |     Writes an iterable of dictionaries to jsonl
 35 |     """
 36 |     if append:
 37 |         mode = 'ab'
 38 |     else:
 39 |         mode = 'wb'
 40 |     filename = os.path.expanduser(filename)
 41 |     if filename.endswith(".gz"):
 42 |         with open(filename, mode) as fp:
 43 |             with gzip.GzipFile(fileobj=fp, mode='wb') as gzfp:
 44 |                 for x in data:
 45 |                     gzfp.write((json.dumps(x) + "\n").encode('utf-8'))
 46 |     else:
 47 |         with open(filename, mode) as fp:
 48 |             for x in data:
 49 |                 fp.write((json.dumps(x) + "\n").encode('utf-8'))
 50 | 
 51 | 
 52 | def get_metadata(dataset, metadata_type="problem"):
 53 |   assert metadata_type in ["problem", "example"]
 54 |   assert dataset in ["mbxp", "multi-humaneval", "mathqa-x"], f"Unsupported dataset {dataset}"
 55 |   dataset_dirmap = {"mbxp": "mbxp",
 56 |                     "multi-humaneval": "multilingual_humaneval",
 57 |                     "mathqa-x": "multilingual_mathqa"}
 58 |   typemap = {"problem": "metadata.json",
 59 |              "example": "metadata_examples.json"}
 60 |   datadir = os.path.join(ROOT, "..", "data", dataset_dirmap[dataset])
 61 |   path =  os.path.join(datadir, typemap[metadata_type])
 62 |   with open(path, "r") as f:
 63 |     metadata = json.load(f)
 64 |     return metadata, datadir
 65 | 
 66 | 
 67 | def get_supported_langs(dataset):
 68 |   metadata, _ = get_metadata(dataset, metadata_type="problem")
 69 |   return list(metadata.keys())
 70 | 
 71 | 
 72 | def get_data(dataset="mbxp", language="python"):
 73 |   metadata, datadir = get_metadata(dataset, metadata_type="problem")
 74 |   if language.lower() not in metadata:
 75 |     raise ValueError(f"Language {language} not found in metadata file")
 76 |   datafile = metadata[language.lower()]
 77 |   print(f"Loading {dataset} | language = {language}")
 78 |   return read_problems(os.path.join(datadir, datafile))
 79 | 
 80 | 
 81 | # due to similar format, examples from mbxp are sufficient to be used
 82 | # for few-shot prompting in multi-humaneval
 83 | def get_examples(dataset="mbxp", language="python", num_examples=None):
 84 |   assert dataset in ["mbxp"], f"No fewshot examples in dataset {dataset}"
 85 |   metadata, datadir = get_metadata(dataset=dataset, metadata_type="example")
 86 |   if language.lower() not in metadata:
 87 |     raise ValueError(f"Language {language} not found in metadata file")
 88 |   datafile = metadata[language.lower()]
 89 |   print(f"Loading examples from {dataset} | language = {language}")
 90 |   # use streams
 91 |   if num_examples is None:
 92 |     # return the entire stream
 93 |     return stream_jsonl(os.path.join(datadir, datafile))
 94 |   else:
 95 |     problems = get_data(dataset=dataset, language=language)
 96 |     stream = get_examples(dataset=dataset, language=language)
 97 |     examples = []
 98 |     for idx, example in enumerate(stream):
 99 |       if idx == num_examples:
100 |         break
101 |       task_id = example["task_id"]
102 |       prompt = problems[task_id]["prompt"]
103 |       example["prompt"] = prompt
104 |       examples.append(example)
105 |     return examples
106 | 


--------------------------------------------------------------------------------
/mxeval/mxeval/evaluate_functional_correctness.py:
--------------------------------------------------------------------------------
 1 | # Original Copyright 2021 OpenAI under MIT License.
 2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | 
 4 | import sys
 5 | import os
 6 | 
 7 | import fire
 8 | from mxeval.data import HUMAN_EVAL
 9 | from mxeval.evaluation import evaluate_functional_correctness
10 | 
11 | 
12 | def entry_point(
13 |     sample_file: str,
14 |     problem_file: str = HUMAN_EVAL,
15 |     k: tuple = (1, 10, 100, 1000),  # from command line, use '1,10,100' for example
16 |     n_workers: int = os.cpu_count() - 1,
17 |     timeout: float = 15.0,
18 | ):
19 |     """
20 |     Evaluates the functional correctness of generated samples, and writes
21 |     results to f"{sample_file}_results.jsonl"
22 |     """
23 |     print(f"\n\nEvaluating {sample_file}")
24 |     k = list(map(int, k))
25 |     results = evaluate_functional_correctness(
26 |         sample_file, k, n_workers, timeout, problem_file
27 |     )
28 |     with open(sample_file + "_passatk.json", "w") as f:
29 |         f.write(str(results))
30 |     print(results)
31 | 
32 | 
33 | def main():
34 |     fire.Fire(entry_point)
35 | 
36 | 
37 | sys.exit(main())
38 | 


--------------------------------------------------------------------------------
/mxeval/mxeval/evaluation.py:
--------------------------------------------------------------------------------
  1 | # Original Copyright 2021 OpenAI under MIT License.
  2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | 
  4 | from io import UnsupportedOperation
  5 | import itertools
  6 | import os
  7 | import time
  8 | from collections import Counter, defaultdict
  9 | from concurrent.futures import ThreadPoolExecutor, as_completed
 10 | from typing import Dict, Iterable, List, Union
 11 | 
 12 | import numpy as np
 13 | import tqdm
 14 | from mxeval.data import HUMAN_EVAL, read_problems, stream_jsonl, write_jsonl
 15 | 
 16 | # Amazon modification
 17 | # import check correctness for all languages
 18 | from mxeval.execution import (
 19 |     check_correctness,
 20 |     check_correctness_cpp,
 21 |     check_correctness_csharp,
 22 |     check_correctness_go,
 23 |     check_correctness_java,
 24 |     check_correctness_javascript,
 25 |     check_correctness_kotlin,
 26 |     check_correctness_perl,
 27 |     check_correctness_php,
 28 |     check_correctness_ruby,
 29 |     check_correctness_scala,
 30 |     check_correctness_swift,
 31 |     check_correctness_typescript,
 32 | )
 33 | 
 34 | check_correctness_function_map = {
 35 |         "python": check_correctness,
 36 |         "java": check_correctness_java,
 37 |         "javascript": check_correctness_javascript,
 38 |         "typescript": check_correctness_typescript,
 39 |         "kotlin": check_correctness_kotlin,
 40 |         "ruby": check_correctness_ruby,
 41 |         "php": check_correctness_php,
 42 |         "cpp": check_correctness_cpp,
 43 |         "csharp": check_correctness_csharp,
 44 |         "go": check_correctness_go,
 45 |         "perl": check_correctness_perl,
 46 |         "scala": check_correctness_scala,
 47 |         "swift": check_correctness_swift,
 48 |     }
 49 | 
 50 | def estimate_pass_at_k(
 51 |     num_samples: Union[int, List[int], np.ndarray],
 52 |     num_correct: Union[List[int], np.ndarray],
 53 |     k: int,
 54 | ) -> np.ndarray:
 55 |     """
 56 |     Estimates pass@k of each problem and returns them in an array.
 57 |     """
 58 | 
 59 |     def estimator(n: int, c: int, k: int) -> float:
 60 |         """
 61 |         Calculates 1 - comb(n - c, k) / comb(n, k).
 62 |         """
 63 |         if n - c < k:
 64 |             return 1.0
 65 |         return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
 66 | 
 67 |     if isinstance(num_samples, int):
 68 |         num_samples_it = itertools.repeat(num_samples, len(num_correct))
 69 |     else:
 70 |         assert len(num_samples) == len(num_correct)
 71 |         num_samples_it = iter(num_samples)
 72 | 
 73 |     return np.array(
 74 |         [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]
 75 |     )
 76 | 
 77 | def get_execute_function(lang):
 78 |     lang = lang.lower()
 79 |     assert lang in check_correctness_function_map, f"Language {lang} is not among the supported languages: {check_correctness_function_map.keys()}"
 80 |     return check_correctness_function_map[lang]
 81 | 
 82 | def evaluate_functional_correctness(
 83 |     sample_file: str,
 84 |     k: List[int] = [1, 10, 100],
 85 |     n_workers: int = os.cpu_count() - 1,
 86 |     timeout: float = 10.0,
 87 |     problem_file: str = HUMAN_EVAL,
 88 | ):
 89 |     """
 90 |     Evaluates the functional correctness of generated samples, and writes
 91 |     results to f"{sample_file}_results.jsonl"
 92 |     """
 93 | 
 94 |     if type(problem_file) is not dict:
 95 |         problems = read_problems(problem_file)
 96 |     else:
 97 |         print("Skip reading problems -- using problem_file (dict) as problems")
 98 |         problems = problem_file
 99 | 
100 |     # see execution.py for details
101 |     # Check the generated samples against test suites.
102 |     check_correctness_function_map = {
103 |         "python": check_correctness,
104 |         "java": check_correctness_java,
105 |         "javascript": check_correctness_javascript,
106 |         "typescript": check_correctness_typescript,
107 |         "kotlin": check_correctness_kotlin,
108 |         "ruby": check_correctness_ruby,
109 |         "php": check_correctness_php,
110 |         "cpp": check_correctness_cpp,
111 |         "csharp": check_correctness_csharp,
112 |         "go": check_correctness_go,
113 |         "perl": check_correctness_perl,
114 |         "scala": check_correctness_scala,
115 |         "swift": check_correctness_swift,
116 |     }
117 | 
118 |     seed = int(time.time() * 1000000) % 1000000
119 |     np.random.seed(seed=seed)  # microsecond
120 | 
121 |     with ThreadPoolExecutor(max_workers=n_workers) as executor:
122 |         futures = []
123 |         completion_id = Counter()
124 |         n_samples = 0
125 |         results = defaultdict(list)
126 | 
127 |         print("Reading samples...")
128 |         for sample in tqdm.tqdm(stream_jsonl(sample_file)):
129 |             task_id = sample["task_id"]
130 |             completion = sample["completion"]
131 |             args = (problems[task_id], completion, timeout, completion_id[task_id])
132 |             language = sample["language"]
133 |             check_correctness_function = check_correctness_function_map[language]
134 |             future = executor.submit(check_correctness_function, *args)
135 |             futures.append(future)
136 |             completion_id[task_id] += 1
137 |             n_samples += 1
138 | 
139 |         assert len(completion_id) == len(problems), "Some problems are not attempted."
140 | 
141 |         print("Running test suites...")
142 |         for future in tqdm.tqdm(as_completed(futures), total=len(futures)):
143 |             result = future.result()  # this is the execution stage
144 |             results[result["task_id"]].append((result["completion_id"], result))
145 | 
146 |     # common code for all languages
147 |     # Calculate pass@k.
148 |     total, correct = [], []
149 |     for result in results.values():
150 |         result.sort()
151 |         passed = [r[1]["passed"] for r in result]
152 |         total.append(len(passed))
153 |         correct.append(sum(passed))
154 |     total = np.array(total)
155 |     correct = np.array(correct)
156 | 
157 |     ks = k
158 |     pass_at_k = {
159 |         f"pass@{k}": estimate_pass_at_k(total, correct, k).mean()
160 |         for k in ks
161 |         if (total >= k).all()
162 |     }
163 | 
164 |     # Finally, save the results in one file:
165 |     def combine_results():
166 |         for sample in stream_jsonl(sample_file):
167 |             task_id = sample["task_id"]
168 |             result = results[task_id].pop(0)
169 |             sample["result"] = result[1]["result"]
170 |             sample["passed"] = result[1]["passed"]
171 |             sample["time_elapsed"] = result[1]["time_elapsed"]
172 |             yield sample
173 | 
174 |     out_file = sample_file + "_results.jsonl"
175 |     print(f"Writing results to {out_file}...")
176 |     write_jsonl(out_file, tqdm.tqdm(combine_results(), total=n_samples))
177 | 
178 |     return pass_at_k
179 | 


--------------------------------------------------------------------------------
/mxeval/mxeval/execution.py:
--------------------------------------------------------------------------------
  1 | # Original Copyright 2021 OpenAI under MIT License.
  2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | # check_correctness_* functions are AWS additions
  4 | 
  5 | import contextlib
  6 | import faulthandler
  7 | import io
  8 | import multiprocessing
  9 | import os
 10 | import platform
 11 | import random
 12 | import shutil
 13 | import signal
 14 | import string
 15 | import subprocess
 16 | import tempfile
 17 | import time
 18 | import errno
 19 | from typing import Dict, Optional
 20 | import threading
 21 | lock = threading.Lock()
 22 | 
 23 | 
 24 | def check_correctness_java(
 25 |     problem: Dict,
 26 |     completion: str,
 27 |     timeout: float,
 28 |     completion_id: Optional[int] = None,
 29 |     verbose=False,
 30 |     language="java",
 31 |     compile_timeout: float = 100,
 32 | ):
 33 |     """
 34 |     Run all evaluation under java_exec_eval + randomized directory to avoid collision.
 35 |     Using subprocess with concurrent.futures for multi-thread evaluation.
 36 |     Make sure to clean up resources even if the test cases fail.
 37 |     """
 38 | 
 39 |     current_dir = os.path.dirname(os.path.realpath(__file__))
 40 |     entire_string = problem["prompt"] + completion + problem["test"]
 41 |     base_path = setup_base_path(current_dir, f"{language}_exec_eval", "")
 42 |     try:
 43 |         os.makedirs(base_path, exist_ok=False)
 44 |     except OSError as e:
 45 |         if e.errno != errno.EEXIST:
 46 |             raise
 47 |     path = os.path.join(base_path, f"main.{language}")
 48 | 
 49 |     with open(path, "w") as f:
 50 |         f.write(entire_string)
 51 | 
 52 |     try:
 53 |         exec_result_compile = subprocess.run(
 54 |             [f"javac", path],
 55 |             timeout=int(compile_timeout),
 56 |             stdout=subprocess.PIPE,
 57 |             stderr=subprocess.PIPE,
 58 |             text=True,
 59 |         )
 60 |         compiled = exec_result_compile.returncode == 0
 61 |         if verbose:
 62 |             print("exec_result_compile", exec_result_compile)
 63 |         start = time.time()
 64 |         exec_result_run = subprocess.run(
 65 |             [f"java", "-cp", base_path, "Main"],
 66 |             timeout=int(timeout),
 67 |             stdout=subprocess.PIPE,
 68 |             stderr=subprocess.PIPE,
 69 |             text=True,
 70 |         )
 71 |         elapsed = 1000.0 * (time.time() - start)
 72 |         if verbose:
 73 |             print("exec result run", exec_result_run)
 74 |         passed = exec_result_compile.returncode == 0 and exec_result_run.returncode == 0
 75 |         if exec_result_compile.returncode > 0:
 76 |             message = exec_result_compile.stderr
 77 |         else:
 78 |             message = exec_result_run.stderr
 79 | 
 80 |     except Exception as e:
 81 |         passed = False
 82 |         message = str(e)
 83 |         elapsed = None
 84 |         compiled = False
 85 | 
 86 |     try:
 87 |         shutil.rmtree(base_path)
 88 |     except Exception as e:
 89 |         if verbose:
 90 |             print(f"Error cleaning up directory {base_path}: {e}")
 91 | 
 92 |     return dict(
 93 |         task_id=problem["task_id"],
 94 |         passed=passed,
 95 |         result=message,
 96 |         completion_id=completion_id,
 97 |         time_elapsed=elapsed,
 98 |         compiled=compiled,
 99 |     )
100 | 
101 | 
102 | def check_correctness_scala(
103 |     problem: Dict,
104 |     completion: str,
105 |     timeout: float,
106 |     completion_id: Optional[int] = None,
107 |     verbose=False,
108 |     language="scala",
109 |     compile_timeout: float = 100,
110 | ):
111 | 
112 |     current_dir = os.path.dirname(os.path.realpath(__file__))
113 |     entire_string = problem["prompt"] + completion + problem["test"]
114 |     base_path = setup_base_path(current_dir, f"{language}_exec_eval", "")
115 |     try:
116 |         os.makedirs(base_path, exist_ok=False)
117 |     except OSError as e:
118 |         if e.errno != errno.EEXIST:
119 |             raise
120 |     path = os.path.join(base_path, f"main.{language}")
121 | 
122 |     with open(path, "w") as f:
123 |         f.write(entire_string)
124 | 
125 |     try:
126 |         exec_result_compile = subprocess.run(
127 |             [f"scalac", path, "-d", base_path],
128 |             timeout=int(compile_timeout),
129 |             stdout=subprocess.PIPE,
130 |             stderr=subprocess.PIPE,
131 |             text=True,
132 |         )
133 |         compiled = exec_result_compile.returncode == 0
134 |         if verbose:
135 |             print("exec_result_compile", exec_result_compile)
136 |         start = time.time()
137 |         exec_result_run = subprocess.run(
138 |             [f"scala", "-cp", base_path, "Main"],
139 |             timeout=int(timeout),
140 |             stdout=subprocess.PIPE,
141 |             stderr=subprocess.PIPE,
142 |             text=True,
143 |         )
144 |         elapsed = 1000.0 * (time.time() - start)
145 |         if verbose:
146 |             print("exec result run", exec_result_run)
147 |         passed = exec_result_compile.returncode == 0 and exec_result_run.returncode == 0
148 |         if exec_result_compile.returncode > 0:
149 |             message = exec_result_compile.stderr
150 |         else:
151 |             message = exec_result_run.stderr
152 | 
153 |     except Exception as e:
154 |         passed = False
155 |         message = str(e)
156 |         elapsed = None
157 |         compiled = False
158 | 
159 |     try:
160 |         shutil.rmtree(base_path)
161 |     except Exception as e:
162 |         if verbose:
163 |             print(f"Error cleaning up directory {base_path}: {e}")
164 | 
165 |     return dict(
166 |         task_id=problem["task_id"],
167 |         passed=passed,
168 |         result=message,
169 |         completion_id=completion_id,
170 |         time_elapsed=elapsed,
171 |         compiled=compiled,
172 |     )
173 | 
174 | 
175 | def check_correctness_perl(
176 |     problem: Dict,
177 |     completion: str,
178 |     timeout: float,
179 |     completion_id: Optional[int] = None,
180 |     verbose=False,
181 | ):
182 |     return check_correctness_helper(
183 |         problem=problem,
184 |         completion=completion,
185 |         timeout=timeout,
186 |         completion_id=completion_id,
187 |         verbose=verbose,
188 |         language="perl",
189 |         extension=".pl",
190 |         subprocess_command_lambda=lambda x: ["perl", f"{x}.pl"],
191 |     )
192 | 
193 | 
194 | def check_correctness_swift(
195 |     problem: Dict,
196 |     completion: str,
197 |     timeout: float,
198 |     completion_id: Optional[int] = None,
199 |     verbose=False,
200 | ):
201 |     return check_correctness_helper(
202 |         problem=problem,
203 |         completion=completion,
204 |         timeout=timeout,
205 |         completion_id=completion_id,
206 |         verbose=verbose,
207 |         language="swift",
208 |         extension=".swift",
209 |         subprocess_command_lambda=lambda x: ["swift", f"{x}.swift"],
210 |     )
211 | 
212 | 
213 | def check_correctness_javascript(
214 |     problem: Dict,
215 |     completion: str,
216 |     timeout: float,
217 |     completion_id: Optional[int] = None,
218 |     verbose=False,
219 | ):
220 |     return check_correctness_helper(
221 |         problem=problem,
222 |         completion=completion,
223 |         timeout=timeout,
224 |         completion_id=completion_id,
225 |         verbose=verbose,
226 |         language="javascript",
227 |         extension=".js",
228 |         subprocess_command_lambda=lambda x: ["node", f"{x}.js"],
229 |     )
230 | 
231 | 
232 | def check_correctness_typescript(
233 |     problem: Dict,
234 |     completion: str,
235 |     timeout: float,
236 |     completion_id: Optional[int] = None,
237 |     verbose=False,
238 | ):
239 |     return check_correctness_helper(
240 |         problem=problem,
241 |         completion=completion,
242 |         timeout=timeout,
243 |         completion_id=completion_id,
244 |         verbose=verbose,
245 |         language="typescript",
246 |         extension=".ts",
247 |         compile_command_lambda=lambda x: f"npx tsc {x} --target es5 --lib es2016".split(),
248 |         subprocess_command_lambda=lambda x: ["node", f"{x}.js"],
249 |     )
250 | 
251 | 
252 | def check_correctness_ruby(
253 |     problem: Dict,
254 |     completion: str,
255 |     timeout: float,
256 |     completion_id: Optional[int] = None,
257 |     verbose=False,
258 | ):
259 |     return check_correctness_helper(
260 |         problem=problem,
261 |         completion=completion,
262 |         timeout=timeout,
263 |         completion_id=completion_id,
264 |         verbose=verbose,
265 |         language="ruby",
266 |         extension=".rb",
267 |         subprocess_command_lambda=lambda x: ["ruby", f"{x}.rb"],
268 |     )
269 | 
270 | 
271 | def check_correctness_kotlin(
272 |     problem: Dict,
273 |     completion: str,
274 |     timeout: float,
275 |     completion_id: Optional[int] = None,
276 |     verbose=False,
277 | ):
278 |     return check_correctness_helper(
279 |         problem=problem,
280 |         completion=completion,
281 |         timeout=timeout,
282 |         completion_id=completion_id,
283 |         verbose=verbose,
284 |         language="kotlin",
285 |         extension=".kt",
286 |         compile_command_lambda=lambda x: [
287 |             "kotlinc",
288 |             f"{x}.kt",
289 |             "-include-runtime",
290 |             "-d",
291 |             f"{x}.jar",
292 |         ],
293 |         compile_timeout=100,  # needs longer than 20 sec
294 |         subprocess_command_lambda=lambda x: ["java", "-jar", f"{x}.jar"],
295 |         extra_cleanup=lambda x: f"{x}.jar",
296 |     )
297 | 
298 | 
299 | def check_correctness_php(
300 |     problem: Dict,
301 |     completion: str,
302 |     timeout: float,
303 |     completion_id: Optional[int] = None,
304 |     verbose=False,
305 | ):
306 |     return check_correctness_helper(
307 |         problem=problem,
308 |         completion=completion,
309 |         timeout=timeout,
310 |         completion_id=completion_id,
311 |         verbose=verbose,
312 |         language="php",
313 |         extension=".php",
314 |         subprocess_command_lambda=lambda x: ["php", f"{x}.php"],
315 |     )
316 | 
317 | 
318 | def check_correctness_go(
319 |     problem: Dict,
320 |     completion: str,
321 |     timeout: float,
322 |     completion_id: Optional[int] = None,
323 |     verbose=False,
324 | ):
325 |     return check_correctness_helper(
326 |         problem=problem,
327 |         completion=completion,
328 |         timeout=timeout,
329 |         completion_id=completion_id,
330 |         verbose=verbose,
331 |         language="go",
332 |         extension=".go",
333 |         subprocess_command_lambda=lambda x: ["go", "run", f"{x}.go"],
334 |     )
335 | 
336 | 
337 | def check_correctness_csharp(
338 |     problem: Dict,
339 |     completion: str,
340 |     timeout: float,
341 |     completion_id: Optional[int] = None,
342 |     verbose=False,
343 |     compilation_timeout: float = 100,
344 | ):
345 |     current_dir = os.path.dirname(os.path.realpath(__file__))
346 |     program = problem["prompt"] + completion + problem["test"]
347 |     # template c# project has all necessary DLLs
348 |     template_cs_proj_zip = os.path.join(current_dir, "../resources/eval_csproj.zip")
349 |     cs_eval_dir = setup_base_path(current_dir, "cs_eval", "")
350 | 
351 |     # extract zip into cs_eval_dir
352 |     subprocess.check_call(
353 |         f"unzip -q {template_cs_proj_zip} -d {cs_eval_dir}".split(), timeout=int(compilation_timeout)
354 |     )
355 | 
356 |     passed, message = None, None
357 |     compiled = False
358 | 
359 |     try:
360 |         cs_project_path = os.path.join(cs_eval_dir, "eval_csproj")
361 |         # entrypoint
362 |         cs_program_path = os.path.join(cs_project_path, "Program.cs")
363 |         with open(cs_program_path, "w") as f1:
364 |             f1.write(program)
365 |             f1.flush()
366 | 
367 |         compile_result = subprocess.run(
368 |             f"dotnet build {cs_project_path}".split(),
369 |             timeout=int(compilation_timeout),
370 |             stdout=subprocess.PIPE,
371 |             stderr=subprocess.PIPE,
372 |             text=True,
373 |         )
374 |         compiled = compile_result.returncode == 0
375 |         message = compile_result.stderr
376 | 
377 |         if compiled:
378 |             compiled_bin = os.path.join(cs_project_path, "bin/Debug/net6.0/eval_csproj")
379 |             start = time.time()
380 |             exec_result = subprocess.run(
381 |                 compiled_bin.split(),
382 |                 timeout=int(timeout),
383 |                 stdout=subprocess.PIPE,
384 |                 stderr=subprocess.PIPE,
385 |                 text=True,
386 |             )
387 |             passed = exec_result.returncode == 0
388 |             message = exec_result.stderr
389 |             elapsed = 1000.0 * (time.time() - start)
390 |         else:
391 |             passed, elapsed = False, None
392 |     except Exception as e:
393 |         if verbose:
394 |             print(f"error occurred when running test cases: {e}")
395 |         message = str(e)
396 |         passed = False
397 |         elapsed = None
398 |     finally:
399 |         try:
400 |             shutil.rmtree(cs_eval_dir)
401 |         except Exception as e:
402 |             if verbose:
403 |                 print(f"Error trying to clean up directory: {e}")
404 | 
405 |     assert passed is not None, "should be either True or False"
406 | 
407 |     return dict(
408 |         task_id=problem["task_id"],
409 |         passed=passed,
410 |         result=message,
411 |         completion_id=completion_id,
412 |         compiled=compiled,
413 |         time_elapsed=elapsed,
414 |     )
415 | 
416 | 
417 | def check_correctness_cpp(
418 |     problem: Dict,
419 |     completion: str,
420 |     timeout: float,
421 |     completion_id: Optional[int] = None,
422 |     verbose=False,
423 | ):
424 |     return check_correctness_helper(
425 |         problem=problem,
426 |         completion=completion,
427 |         timeout=timeout,
428 |         completion_id=completion_id,
429 |         verbose=verbose,
430 |         language="c#",
431 |         extension=".cpp",
432 |         compile_command_lambda=lambda x: [
433 |             "g++",
434 |             f"{os.path.basename(x)}.cpp",
435 |             "-o",
436 |             f"{os.path.basename(x)}_cpp",
437 |         ],
438 |         compile_timeout=100,
439 |         subprocess_command_lambda=lambda x: [f"./{os.path.basename(x)}_cpp"],
440 |         extra_cleanup=lambda x: f"{x}_cpp",
441 |         cwd=True,
442 |     )
443 | 
444 | 
445 | def setup_base_path(
446 |     current_dir,
447 |     language_dirname,
448 |     extension
449 | ):
450 |     with lock:
451 |         if not os.path.isdir(os.path.join(current_dir, language_dirname)):
452 |             os.makedirs(os.path.join(current_dir, language_dirname))
453 | 
454 |     num_attempts, path = 0, None
455 |     while True:
456 |         num_attempts += 1
457 |         if num_attempts > 10:
458 |             assert False, "Unable to avoid filename collision"
459 |         basename = "".join(
460 |             random.choices(string.ascii_lowercase + string.ascii_uppercase, k=10)
461 |         )
462 | 
463 |         base_path = os.path.join(current_dir, language_dirname, f"{basename}")
464 |         path = base_path + f"{extension}"
465 | 
466 |         if extension == "":
467 |             if not os.path.isdir(path):
468 |                 to_return = path
469 |                 break
470 |         if not os.path.isfile(path):
471 |             to_return = base_path
472 |             break
473 | 
474 |     return to_return
475 | 
476 | 
477 | def check_correctness_helper(
478 |     problem: Dict,
479 |     completion: str,
480 |     timeout: float,
481 |     completion_id: Optional[int] = None,
482 |     verbose=False,
483 |     language=None,
484 |     extension=None,
485 |     subprocess_command_lambda=None,
486 |     compile_timeout=100,
487 |     compile_command_lambda=None,
488 |     extra_cleanup=None,
489 |     cwd=None,
490 | ):
491 |     current_dir = os.path.dirname(os.path.realpath(__file__))
492 |     entire_string = problem["prompt"] + completion + problem["test"]
493 | 
494 |     language_dirname = f"{language}_exec_eval"
495 | 
496 |     base_path = setup_base_path(current_dir, language_dirname, extension)
497 |     path = base_path + f"{extension}"
498 | 
499 |     if cwd is not None:
500 |         cwd = os.path.dirname(base_path)
501 |     with open(path, "w") as f:
502 |         f.write(entire_string)
503 |     try:
504 |         if compile_command_lambda is not None:
505 |             compile_result = subprocess.run(
506 |                 compile_command_lambda(base_path),
507 |                 timeout=int(compile_timeout),
508 |                 stdout=subprocess.PIPE,
509 |                 stderr=subprocess.PIPE,
510 |                 text=True,
511 |                 cwd=cwd,
512 |             )
513 |             compiled = compile_result.returncode == 2 if language == "typescript" else compile_result.returncode == 0
514 |         else:
515 |             compiled = True
516 | 
517 |         if compiled:
518 |             start = time.time()
519 |             exec_result_run = subprocess.run(
520 |                 subprocess_command_lambda(base_path),
521 |                 timeout=int(timeout),
522 |                 stdout=subprocess.PIPE,
523 |                 stderr=subprocess.PIPE,
524 |                 text=True,
525 |                 cwd=cwd,
526 |             )
527 |             elapsed = 1000.0 * (time.time() - start)
528 |             if verbose:
529 |                 print("exec result run", exec_result_run)
530 | 
531 |             passed = exec_result_run.returncode == 0
532 |             message = exec_result_run.stderr
533 |         else:
534 |             passed, message, elapsed = False, compile_result.stderr, None
535 | 
536 |     except Exception as e:
537 |         if verbose:
538 |             print(f"error occurred when running test cases: {e}")
539 |         message = str(e)
540 |         passed, elapsed, compiled = False, None, False
541 | 
542 |     # clean up
543 |     try:
544 |         os.remove(path)
545 |     except Exception as e:
546 |         if verbose:
547 |             print(f"Error trying to clean up file: {e}")
548 |     try:
549 |         if extra_cleanup is not None:
550 |             extra_remove_path = extra_cleanup(base_path)
551 |             assert isinstance(extra_remove_path, str)
552 |             os.remove(extra_remove_path)
553 |     except Exception as e:
554 |         if verbose:
555 |             print(f"Error trying to clean up file: {e}")
556 | 
557 |     # get result
558 |     return dict(
559 |         task_id=problem["task_id"],
560 |         passed=passed,
561 |         result=message,
562 |         completion_id=completion_id,
563 |         time_elapsed=elapsed,
564 |         compiled=compiled,
565 |     )
566 | 
567 | 
568 | def check_correctness(
569 |     problem: Dict, completion: str, timeout: float, completion_id: Optional[int] = None
570 | ) -> Dict:
571 |     """
572 |     Evaluates the functional correctness of a completion by running the test
573 |     suite provided in the problem.
574 |     :param completion_id: an optional completion ID so we can match
575 |         the results later even if execution finishes asynchronously.
576 |     """
577 | 
578 |     def unsafe_execute():
579 | 
580 |         with create_tempdir():
581 | 
582 |             # These system calls are needed when cleaning up tempdir.
583 |             import os
584 |             import shutil
585 | 
586 |             rmtree = shutil.rmtree
587 |             rmdir = os.rmdir
588 |             chdir = os.chdir
589 | 
590 |             # Disable functionalities that can make destructive changes to the test.
591 |             reliability_guard()
592 | 
593 |             # Construct the check program and run it.
594 |             check_program = (
595 |                 problem["prompt"]
596 |                 + completion
597 |                 + "\n"
598 |                 + problem["test"]
599 |                 + "\n"
600 |                 + f"check({problem['entry_point']})"
601 |             )
602 | 
603 |             try:
604 |                 exec_globals = {}
605 |                 with swallow_io():
606 |                     with time_limit(timeout):
607 |                         # WARNING
608 |                         # This program exists to execute untrusted model-generated code. Although
609 |                         # it is highly unlikely that model-generated code will do something overtly
610 |                         # malicious in response to this test suite, model-generated code may act
611 |                         # destructively due to a lack of model capability or alignment.
612 |                         # Users are strongly encouraged to sandbox this evaluation suite so that it
613 |                         # does not perform destructive actions on their host or network. For more
614 |                         # information on how OpenAI sandboxes its code, see the accompanying paper.
615 |                         # Once you have read this disclaimer and taken appropriate precautions,
616 |                         # uncomment the following line and proceed at your own risk:
617 |                         exec(check_program, exec_globals)
618 |                 result.append("passed")
619 |             except TimeoutException:
620 |                 result.append("timed out")
621 |             except BaseException as e:
622 |                 result.append(f"failed: {e}")
623 | 
624 |             # Needed for cleaning up.
625 |             shutil.rmtree = rmtree
626 |             os.rmdir = rmdir
627 |             os.chdir = chdir
628 | 
629 |     manager = multiprocessing.Manager()
630 |     result = manager.list()
631 | 
632 |     start = time.time()
633 |     p = multiprocessing.Process(target=unsafe_execute)
634 |     p.start()
635 |     p.join(timeout=timeout + 1)
636 |     if p.is_alive():
637 |         p.kill()
638 |     elapsed = 1000.0 * (time.time() - start)
639 | 
640 |     if not result:
641 |         result.append("timed out")
642 | 
643 |     return dict(
644 |         task_id=problem["task_id"],
645 |         passed=result[0] == "passed",
646 |         result=result[0],
647 |         completion_id=completion_id,
648 |         time_elapsed=elapsed,
649 |     )
650 | 
651 | 
652 | @contextlib.contextmanager
653 | def time_limit(seconds: float):
654 |     def signal_handler(signum, frame):
655 |         raise TimeoutException("Timed out!")
656 | 
657 |     signal.setitimer(signal.ITIMER_REAL, seconds)
658 |     signal.signal(signal.SIGALRM, signal_handler)
659 |     try:
660 |         yield
661 |     finally:
662 |         signal.setitimer(signal.ITIMER_REAL, 0)
663 | 
664 | 
665 | @contextlib.contextmanager
666 | def swallow_io():
667 |     stream = WriteOnlyStringIO()
668 |     with contextlib.redirect_stdout(stream):
669 |         with contextlib.redirect_stderr(stream):
670 |             with redirect_stdin(stream):
671 |                 yield
672 | 
673 | 
674 | @contextlib.contextmanager
675 | def create_tempdir():
676 |     with tempfile.TemporaryDirectory() as dirname:
677 |         with chdir(dirname):
678 |             yield dirname
679 | 
680 | 
681 | class TimeoutException(Exception):
682 |     pass
683 | 
684 | 
685 | class WriteOnlyStringIO(io.StringIO):
686 |     """StringIO that throws an exception when it's read from"""
687 | 
688 |     def read(self, *args, **kwargs):
689 |         raise IOError
690 | 
691 |     def readline(self, *args, **kwargs):
692 |         raise IOError
693 | 
694 |     def readlines(self, *args, **kwargs):
695 |         raise IOError
696 | 
697 |     def readable(self, *args, **kwargs):
698 |         """Returns True if the IO object can be read."""
699 |         return False
700 | 
701 | 
702 | class redirect_stdin(contextlib._RedirectStream):  # type: ignore
703 |     _stream = "stdin"
704 | 
705 | 
706 | @contextlib.contextmanager
707 | def chdir(root):
708 |     if root == ".":
709 |         yield
710 |         return
711 |     cwd = os.getcwd()
712 |     os.chdir(root)
713 |     try:
714 |         yield
715 |     except BaseException as exc:
716 |         raise exc
717 |     finally:
718 |         os.chdir(cwd)
719 | 
720 | 
721 | def reliability_guard(maximum_memory_bytes: Optional[int] = None):
722 |     """
723 |     This disables various destructive functions and prevents the generated code
724 |     from interfering with the test (e.g. fork bomb, killing other processes,
725 |     removing filesystem files, etc.)
726 |     WARNING
727 |     This function is NOT a security sandbox. Untrusted code, including, model-
728 |     generated code, should not be blindly executed outside of one. See the
729 |     Codex paper for more information about OpenAI's code sandbox, and proceed
730 |     with caution.
731 |     """
732 | 
733 |     if maximum_memory_bytes is not None:
734 |         import resource
735 | 
736 |         resource.setrlimit(
737 |             resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)
738 |         )
739 |         resource.setrlimit(
740 |             resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)
741 |         )
742 |         if not platform.uname().system == "Darwin":
743 |             resource.setrlimit(
744 |                 resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)
745 |             )
746 | 
747 |     faulthandler.disable()
748 | 
749 |     import builtins
750 | 
751 |     builtins.exit = None
752 |     builtins.quit = None
753 | 
754 |     import os
755 | 
756 |     os.environ["OMP_NUM_THREADS"] = "1"
757 | 
758 |     os.kill = None
759 |     os.system = None
760 |     os.putenv = None
761 |     os.remove = None
762 |     os.removedirs = None
763 |     os.rmdir = None
764 |     os.fchdir = None
765 |     os.setuid = None
766 |     os.fork = None
767 |     os.forkpty = None
768 |     os.killpg = None
769 |     os.rename = None
770 |     os.renames = None
771 |     os.truncate = None
772 |     os.replace = None
773 |     os.unlink = None
774 |     os.fchmod = None
775 |     os.fchown = None
776 |     os.chmod = None
777 |     os.chown = None
778 |     os.chroot = None
779 |     os.fchdir = None
780 |     os.lchflags = None
781 |     os.lchmod = None
782 |     os.lchown = None
783 |     os.getcwd = None
784 |     os.chdir = None
785 | 
786 |     import shutil
787 | 
788 |     shutil.rmtree = None
789 |     shutil.move = None
790 |     shutil.chown = None
791 | 
792 |     import subprocess
793 | 
794 |     subprocess.Popen = None  # type: ignore
795 | 
796 |     __builtins__["help"] = None
797 | 
798 |     import sys
799 | 
800 |     sys.modules["ipdb"] = None
801 |     sys.modules["joblib"] = None
802 |     sys.modules["resource"] = None
803 |     sys.modules["psutil"] = None
804 |     sys.modules["tkinter"] = None
805 | 


--------------------------------------------------------------------------------
/mxeval/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | fire
3 | numpy
4 | 


--------------------------------------------------------------------------------
/mxeval/resources/eval_csproj.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/resources/eval_csproj.zip


--------------------------------------------------------------------------------
/mxeval/setup.py:
--------------------------------------------------------------------------------
 1 | # Original Copyright 2021 OpenAI under MIT License.
 2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | import os
 4 | 
 5 | import pkg_resources
 6 | from setuptools import setup, find_packages
 7 | 
 8 | 
 9 | setup(
10 |     name="mxeval",
11 |     py_modules=["mxeval"],
12 |     version="1.0",
13 |     description="",
14 |     author="AWS AI Labs",
15 |     packages=find_packages(),
16 |     install_requires=[
17 |         str(r)
18 |         for r in pkg_resources.parse_requirements(
19 |             open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
20 |         )
21 |     ],
22 |     entry_points={
23 |         "console_scripts": [
24 |             "evaluate_functional_correctness = mxeval.evaluate_functional_correctness",
25 |         ]
26 |     }
27 | )
28 | 


--------------------------------------------------------------------------------
/python_chinese_generated_samples.jsonl:
--------------------------------------------------------------------------------
 1 | {"task_id": "python/0", "language": "python", "completion": "    for i in range(len(operations)):\n        if operations[i] < 0:\n            return True\n    return False\n\n\n"}
 2 | {"task_id": "python/1", "language": "python", "completion": "    return sum(numbers), reduce(lambda x, y: x * y, numbers)\n\n\n"}
 3 | {"task_id": "python/2", "language": "python", "completion": "    return ''.join(chr(ord(a_i) ^ ord(b_i)) for a_i, b_i in zip(a, b))\n\n\n"}
 4 | {"task_id": "python/3", "language": "python", "completion": "    if not strings:\n        return None\n    return max(strings, key=len)\n\n\n"}
 5 | {"task_id": "python/4", "language": "python", "completion": "    if a < b:\n        a, b = b, a\n    while b != 0:\n        a, b = b, a % b\n    return a\n\n"}
 6 | {"task_id": "python/5", "language": "python", "completion": "    numbers = numbers.split()\n    numbers.sort()\n    return ' '.join(numbers)\n\n\n"}
 7 | {"task_id": "python/6", "language": "python", "completion": "    min_number = min(numbers)\n    max_number = max(numbers)\n    return [(number - min_number) / (max_number - min_number) for number in numbers]\n\n\n"}
 8 | {"task_id": "python/7", "language": "python", "completion": "    return string.swapcase()\n\n"}
 9 | {"task_id": "python/8", "language": "python", "completion": "    return [x for x in l if x > 0]\n\n\n"}
10 | {"task_id": "python/9", "language": "python", "completion": "    if n < 2:\n        return False\n    if n == 2:\n        return True\n    if n % 2 == 0:\n        return False\n    for i in range(3, int(math.sqrt(n)) + 1, 2):\n        if n % i == 0:\n            return False\n    return True\n\n"}
11 | {"task_id": "python/10", "language": "python", "completion": "    return list(set(l))\n\n"}
12 | {"task_id": "python/11", "language": "python", "completion": "    if n == 1:\n        return 2\n    elif n == 2:\n        return 3\n    elif n == 3:\n        return 5\n    elif n == 4:\n        return 13\n    elif n == 5:\n        return 89\n    else:\n        return prime_fib(n-1) + prime_fib(n-2)\n\n"}
13 | {"task_id": "python/12", "language": "python", "completion": "    for i in range(len(l) - 2):\n        if l[i] + l[i + 1] + l[i + 2] == 0:\n            return True\n    return False\n\n"}
14 | {"task_id": "python/13", "language": "python", "completion": "    for i in range(len(l) - 1):\n        if l[i] + l[i + 1] == 0:\n            return True\n    return False\n\n"}
15 | {"task_id": "python/14", "language": "python", "completion": "    if n == 0:\n        return 0\n    elif n == 1:\n        return 0\n    elif n == 2:\n        return 2\n    elif n == 3:\n        return 0\n    else:\n        return fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4)\n\n"}
16 | {"task_id": "python/15", "language": "python", "completion": "    l.sort()\n    n = len(l)\n    if n % 2 == 1:\n        return l[n // 2]\n    else:\n        return (l[n // 2] + l[n // 2 - 1]) / 2\n\n"}
17 | {"task_id": "python/16", "language": "python", "completion": "    return text == text[::-1]\n\n"}
18 | {"task_id": "python/17", "language": "python", "completion": "    vowels = 'aeiouAEIOU'\n    return ''.join(c for c in text if c not in vowels)\n\n"}
19 | {"task_id": "python/18", "language": "python", "completion": "    return all(x < t for x in l)\n\n"}
20 | {"task_id": "python/19", "language": "python", "completion": "    return x + y\n\n"}
21 | {"task_id": "python/20", "language": "python", "completion": "    return len(set(s0)) == len(set(s1))\n\n"}
22 | {"task_id": "python/21", "language": "python", "completion": "    if n < 2:\n        return n\n    return fib(n-1) + fib(n-2)\n\n"}
23 | {"task_id": "python/22", "language": "python", "completion": "    l1.sort()\n    l2.sort()\n    i = 0\n    j = 0\n    while i < len(l1) and j < len(l2):\n        if l1[i] == l2[j]:\n            return [l1[i], l2[j]]\n        elif l1[i] < l2[j]:\n            i += 1\n        else:\n            j += 1\n    return []\n\n"}
24 | {"task_id": "python/23", "language": "python", "completion": "    if n < 2:\n        return n\n    for i in range(2, int(n**0.5)+1):\n        if n % i == 0:\n            return largest_prime_factor(n//i)\n    return n\n\n"}
25 | {"task_id": "python/24", "language": "python", "completion": "    return n * (n + 1) // 2\n\n\n"}
26 | {"task_id": "python/25", "language": "python", "completion": "    return [i * j for i, j in zip(xs[1:], xs[:-1])]\n\n"}
27 | {"task_id": "python/26", "language": "python", "completion": "    if n == 0:\n        return 0\n    elif n == 1:\n        return 0\n    elif n == 2:\n        return 1\n    else:\n        return fibfib(n-1) + fibfib(n-2) + fibfib(n-3)\n\n"}
28 | {"task_id": "python/27", "language": "python", "completion": "    vowels = 0\n    for c in s:\n        if c in 'aeiou':\n            vowels += 1\n    return vowels\n\n"}
29 | {"task_id": "python/28", "language": "python", "completion": "    #"}
30 | {"task_id": "python/29", "language": "python", "completion": "    s = (a + b + c) / 2\n    area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n    return area if area > 0 else -1\n\n"}
31 | {"task_id": "python/30", "language": "python", "completion": "    if q[0] + q[-1] <= w:\n        return True\n    else:\n        return False\n\n"}
32 | {"task_id": "python/31", "language": "python", "completion": "    if a < 2:\n        return False\n    for i in range(2, int(math.sqrt(a)) + 1):\n        if a % i == 0:\n            return False\n    return True\n\n"}
33 | {"task_id": "python/32", "language": "python", "completion": "    return bin(decimal)[2:]\n\n\n"}
34 | {"task_id": "python/33", "language": "python", "completion": "    if len(s) < 3:\n        return False\n    if s[0] == s[1] == s[2]:\n        return False\n    if s[0] == s[1] and s[1] == s[2]:\n        return False\n    if s[0] == s[1] and s[2] == s[3]:\n        return False\n    if s[0] == s[2] and s[1] == s[3]:\n        return False\n    if s[0] == s[2] and s[2] == s[3]:\n        return False\n    if s[0] == s[1] and s[2] == s[3]:\n        return False\n    if s[0] == s[1] and s[1] == s[2] and s[2] == s[3]:\n        return False\n    return True\n\n\n"}
35 | {"task_id": "python/34", "language": "python", "completion": "    pass\n\n\n"}
36 | {"task_id": "python/35", "language": "python", "completion": "    if len(string) == 1:\n        return False\n    for i in range(2, int(math.sqrt(len(string))) + 1):\n        if len(string) % i == 0:\n            return False\n    return True\n\n"}
37 | {"task_id": "python/36", "language": "python", "completion": "    #"}
38 | {"task_id": "python/37", "language": "python", "completion": "    for i in range(len(lst)):\n        for j in range(len(lst[i])):\n            if lst[i][j] == x:\n                return [(i, j)]\n    return []\n\n\n"}
39 | {"task_id": "python/38", "language": "python", "completion": "    if len(lst) == 0:\n        return None\n    if len(lst) == 1:\n        return lst[0]\n    if lst[0] < lst[1]:\n        return lst[1]\n    else:\n        return next_smallest(lst[1:])\n\n"}
40 | {"task_id": "python/39", "language": "python", "completion": "    return sum(c == 'I' for c in S)\n\n"}
41 | {"task_id": "python/40", "language": "python", "completion": "    #"}
42 | {"task_id": "python/41", "language": "python", "completion": "    if dict == {}:\n        return False\n    for key in dict:\n        if type(key) != str:\n            return False\n        if key.islower():\n            if type(dict[key]) != str:\n                return False\n        elif key.isupper():\n            if type(dict[key]) != str:\n                return False\n        else:\n            return False\n    return True\n\n"}
43 | {"task_id": "python/42", "language": "python", "completion": "    return int(round(value))\n\n"}
44 | {"task_id": "python/43", "language": "python", "completion": "    if n == 1:\n        return [1]\n    else:\n        return [n] + make_a_pile(n-1) + make_a_pile(n-2)\n\n"}
45 | {"task_id": "python/44", "language": "python", "completion": "    return s.split(',')\n\n"}
46 | {"task_id": "python/45", "language": "python", "completion": "    if x > y:\n        x, y = y, x\n    if x % 2 == 0 and y % 2 == 0:\n        for i in range(x, y + 1, 2):\n            if i % 2 == 0:\n                return i\n    return -1\n\n\n"}
47 | {"task_id": "python/46", "language": "python", "completion": "    if n > m:\n        return -1\n    else:\n        return bin(int((n + m) / 2))[2:]\n"}
48 | {"task_id": "python/47", "language": "python", "completion": "    return [i * j for i in range(1, n + 1) for j in range(1, i + 1)]\n\n"}
49 | {"task_id": "python/48", "language": "python", "completion": "    if n < 2:\n        return 0, 0\n    count = 0\n    for i in range(1, n + 1):\n        if i % 2 == 0:\n            count += 1\n            if i == n:\n                break\n            if i == n - 1:\n                continue\n            if i == n - 2:\n                continue\n            if i == n - 3:\n                continue\n            if i == n - 4:\n                continue\n            if i == n - 5:\n                continue\n            if i == n - 6:\n                continue\n            if i == n - 7:\n                continue\n            if i == n - 8:\n                continue\n            if i == n - 9:\n                continue\n            if i == n - 10:\n                continue\n            if i == n - 11:\n                continue\n            if i == n - 12:\n                continue\n            if i == n - 13:\n                continue\n            if i == n - 14:\n                continue\n            if i == n - 15:\n                continue\n            if i == n - 16:\n                continue\n            if i == n - 17:\n                continue\n            if i == n - 18:\n                continue\n            if i == n - 19:\n                continue\n            if i == n - 20:\n                continue\n            if i == n"}
50 | {"task_id": "python/49", "language": "python", "completion": "    if not arr:\n        return True\n    for i in range(len(arr) - 1):\n        if arr[i] > arr[i + 1]:\n            return False\n    return True\n\n\n"}
51 | {"task_id": "python/50", "language": "python", "completion": "    if len(lst1) != len(lst2):\n        return \"NO\"\n    else:\n        for i in range(len(lst1)):\n            if lst1[i] % 2 != 0 and lst2[i] % 2 != 0:\n                return \"NO\"\n        return \"YES\"\n\n"}
52 | {"task_id": "python/51", "language": "python", "completion": "    return s[::-1] == s[::-1].replace(c, '')\n\n"}
53 | {"task_id": "python/52", "language": "python", "completion": "    #"}
54 | {"task_id": "python/53", "language": "python", "completion": "    return [word for word in s.split() if len(word) == n]\n\n\n"}
55 | {"task_id": "python/54", "language": "python", "completion": "    #"}
56 | {"task_id": "python/55", "language": "python", "completion": "    #"}
57 | {"task_id": "python/56", "language": "python", "completion": "    #"}
58 | {"task_id": "python/57", "language": "python", "completion": "    if n == 1:\n        return [1]\n    if n == 2:\n        return [1, 1]\n    if n == 3:\n        return [1, 1, 2]\n    return [1] + tri(n - 1) + [1] + tri(n - 2) + [1] + tri(n - 3)\n\n"}
59 | {"task_id": "python/58", "language": "python", "completion": "    if n < 0:\n        return 0\n    if n == 0:\n        return 1\n    if n == 1:\n        return 1\n    if n == 2:\n        return 2\n    if n == 3:\n        return 6\n    if n == 4:\n        return 4\n    if n == 5:\n        return 10\n    if n == 6:\n        return 9\n    if n == 7:\n        return 40\n    if n == 8:\n        return 36\n    if n == 9:\n        return 45\n    if n == 10:\n        return 55\n    if n == 11:\n        return 20\n    if n == 12:\n        return 72\n    if n == 13:\n        return 216\n    if n == 14:\n        return 81\n    if n == 15:\n        return 90\n    if n == 16:\n        return 96\n    if n == 17:\n        return 120\n    if n == 18:\n        return 81\n    if n == 19:\n        return 90\n    if n == 20:\n        return 96\n    if n == 21:\n        return 120\n    if n == 22:\n        return 81\n    if n == 23:\n        return 90\n    if n == 24:\n        return 96\n    if n == 25:\n        return 120\n    if n"}
60 | {"task_id": "python/59", "language": "python", "completion": "    stack = []\n    for i in string:\n        if i == '[':\n            stack.append(i)\n        elif i == ']':\n            if stack:\n                stack.pop()\n            else:\n                return False\n    if stack:\n        return False\n    else:\n        return True\n\n"}
61 | {"task_id": "python/60", "language": "python", "completion": "    return sum([x**2 for x in lst])\n\n"}
62 | {"task_id": "python/61", "language": "python", "completion": "    return txt[-1].isalpha() and not txt.endswith(' ')\n\n"}
63 | {"task_id": "python/62", "language": "python", "completion": "    if len(arr) == 0:\n        return -1\n    if len(arr) == 1:\n        return 0\n    if len(arr) == 2:\n        return 1\n    if len(arr) == 3:\n        return 2\n    if len(arr) == 4:\n        return 3\n    if len(arr) == 5:\n        return 4\n    if len(arr) == 6:\n        return 5\n    if len(arr) == 7:\n        return 6\n    if len(arr) == 8:\n        return 7\n    if len(arr) == 9:\n        return 8\n    if len(arr) == 10:\n        return 9\n    if len(arr) == 11:\n        return 10\n    if len(arr) == 12:\n        return 11\n    if len(arr) == 13:\n        return 12\n    if len(arr) == 14:\n        return 13\n    if len(arr) == 15:\n        return 14\n    if len(arr) == 16:\n        return 15\n    if len(arr) == 17:\n        return 16\n    if len(arr) == 18:\n        return 17\n    if len(arr) == 19:\n        return 18\n    if len(arr) == 20:\n        return 19\n    if len(arr"}
64 | {"task_id": "python/63", "language": "python", "completion": "    if len(lst) == 0:\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        return (None, None)\n    if lst.count(0) == len(lst):\n        "}
65 | {"task_id": "python/64", "language": "python", "completion": "    if n == 0:\n        return 1\n    else:\n        return n * special_factorial(n - 1)\n\n\n"}
66 | {"task_id": "python/65", "language": "python", "completion": "    #"}
67 | {"task_id": "python/66", "language": "python", "completion": "    #"}
68 | {"task_id": "python/67", "language": "python", "completion": "    return sorted(nums, key=lambda x: sum(map(int, str(x))))\n\n\n"}
69 | {"task_id": "python/68", "language": "python", "completion": "    return len([x for x in nums if x > 10 and x % 2 == 1])\n\n"}
70 | {"task_id": "python/69", "language": "python", "completion": "    a = [i * i - i + 1 for i in range(1, n + 1)]\n    count = 0\n    for i in range(1, n + 1):\n        for j in range(i + 1, n + 1):\n            for k in range(j + 1, n + 1):\n                if a[i] + a[j] + a[k] == 3 * i * j * k:\n                    count += 1\n    return count\n\n\n"}
71 | {"task_id": "python/70", "language": "python", "completion": "    if planet1 not in planets or planet2 not in planets:\n        return ()\n    else:\n        return sorted(list(set(planets[planet1].neighbors + planets[planet2].neighbors)))\n\n"}
72 | {"task_id": "python/71", "language": "python", "completion": "    if n % 2 == 0:\n        return x\n    else:\n        return y\n\n"}
73 | {"task_id": "python/72", "language": "python", "completion": "    if not lst:\n        return 0\n    return sum([x * x for x in lst if x % 2 != 0])\n\n"}
74 | {"task_id": "python/73", "language": "python", "completion": "    return class_name + '.' + max(extensions, key=lambda x: (len(x), x))\n\n"}
75 | {"task_id": "python/74", "language": "python", "completion": "    if a == b:\n        return True\n    if a in b:\n        return True\n    if b in a:\n        return True\n    if len(a) != len(b):\n        return False\n    if len(a) == 1:\n        return False\n    if a[0] == b[0]:\n        return cycpattern_check(a[1:],b[1:])\n    if a[-1] == b[-1]:\n        return cycpattern_check(a[:-1],b[:-1])\n    return False\n\n"}
76 | {"task_id": "python/75", "language": "python", "completion": "    if number < 1 or number > 1000:\n        return None\n    roman_dict = {\n        1: 'i',\n        5: 'v',\n        10: 'x',\n        50: 'l',\n        100: 'c',\n        500: 'd',\n        1000: 'm'\n    }\n    result = ''\n    for key in roman_dict:\n        while number >= key:\n            result += roman_dict[key]\n            number -= key\n    return result\n\n\n"}
77 | {"task_id": "python/76", "language": "python", "completion": "    if a + b > c and a + c > b and b + c > a:\n        return True\n    else:\n        return False\n\n"}
78 | {"task_id": "python/77", "language": "python", "completion": "    return ''.join(s[::-1] if s.isalpha() else s for s in s)\n\n\n"}
79 | {"task_id": "python/78", "language": "python", "completion": "    if text == '':\n        return None\n    return hashlib.md5(text.encode('utf-8')).hexdigest()\n\n\n"}
80 | {"task_id": "python/79", "language": "python", "completion": "    if a > b:\n        return []\n    if a == b:\n        return [a]\n    if a % 2 == 0:\n        return generate_integers(a, b - 1) + [a]\n    else:\n        return generate_integers(a + 1, b)\n\n"}


--------------------------------------------------------------------------------
/src/data_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/src/data_process.png


--------------------------------------------------------------------------------