├── .gitignore ├── LICENSE ├── README.md ├── data ├── .DS_Store ├── csharp │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── go │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── java │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── javascript │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── kotlin │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── perl │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── php │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── python │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── ruby │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── scala │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── swift │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl └── typescript │ ├── Afrikaans.jsonl │ ├── Arabic.jsonl │ ├── Bulgarian.jsonl │ ├── Chinese.jsonl │ ├── Dutch.jsonl │ ├── English.jsonl │ ├── Estonian.jsonl │ ├── Finnish.jsonl │ ├── French.jsonl │ ├── German.jsonl │ ├── Greek.jsonl │ ├── Hebrew.jsonl │ ├── Hungarian.jsonl │ ├── Indonesian.jsonl │ ├── Italian.jsonl │ ├── Malay.jsonl │ ├── Persian.jsonl │ ├── Portuguese.jsonl │ ├── Russian.jsonl │ ├── Spanish.jsonl │ ├── Tagalog.jsonl │ ├── Turkish.jsonl │ └── Vietnamese.jsonl ├── mxeval ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── NOTICE ├── THIRD_PARTY_LICENSES ├── graphics │ ├── mbxp_java_conversion.png │ └── paper_summary.png ├── language_setup │ ├── amazon_linux_ami.sh │ └── ubuntu.sh ├── mxeval │ ├── __init__.py │ ├── data.py │ ├── evaluate_functional_correctness.py │ ├── evaluation.py │ └── execution.py ├── requirements.txt ├── resources │ └── eval_csproj.zip └── setup.py ├── python_chinese_generated_samples.jsonl └── src └── data_process.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 FloatAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [LREC-COLING 2024 | HumanEval-XL: An Execution-based Multilingual Code Generation Benchmark Across 23 Natural Languages and 12 Programming Languages](https://aclanthology.org/2024.lrec-main.735/) 2 | 3 | 4 | Datasets 5 | 6 | 7 | Paper 8 | 9 | 10 | LREC-COLING 2024 11 | 12 | 13 | 14 | This repository contains data and evaluation code for the paper "[HumanEval-XL: A Multilingual Code Generation Benchmark for Cross-lingual Natural Language Generalization](https://aclanthology.org/2024.lrec-main.735.pdf)". 15 | 16 | 17 | ## 🔥 News 18 | * **26 February, 2024:** 🎉 We release the official codebase and data! [[GitHub](https://github.com/floatai/HumanEval-XL/tree/main?tab=readme-ov-file#dataset),[ 19 | 🤗dataset](https://huggingface.co/datasets/floatai/HumanEval-XL)] 🔥 20 | * **19 February, 2024:** 🎉 Our work has been accepted to [LREC-COLING 2024](https://lrec-coling-2024.org/)! ✨ 21 | 22 | ## 🌟 Overview 23 | 24 |
25 | 26 |
27 | 28 | Large language models (LLMs) have made significant progress in generating codes from textual prompts. However, existing benchmarks have mainly concentrated on translating English prompts to multilingual codes or have been constrained to very limited natural languages (NLs). These benchmarks have overlooked the vast landscape of massively multilingual NL to multilingual code, leaving a critical gap in the evaluation of multilingual LLMs. In response, we introduce HumanEval-XL, a massively multilingual code generation benchmark specifically crafted to address this deficiency. HumanEval-XL establishes connections between 23 NLs and 12 programming languages (PLs), and comprises of a collection of 22,080 prompts with an average of 8.33 test cases. By ensuring *parallel* data across multiple NLs and PLs, HumanEval-XL offers a comprehensive evaluation platform for multilingual LLMs, allowing the assessment of the understanding of different NLs. Our work serves as a pioneering step towards filling the void in evaluating NL generalization in the area of multilingual code generation. We make our evaluation code and data publicly available at [https://github.com/floatai/HumanEval-XL](https://github.com/floatai/HumanEval-XL). 29 | 30 | image 31 | 32 | 33 | ## Dataset 34 | The data is stored in `data/program_language/natural_language/`. We have 80 parallel problems in 23 different natural languages and 12 programming languages. 35 | 36 | **23 NLs** are: 37 | "English", "Russian", "Chinese", "German", "Spanish", "French", "Italian", "Portuguese", "Greek", "Hungarian", "Dutch", "Finnish", "Indonesian", "Turkish", "Arabic", "Vietnamese", "Bulgarian", "Persian", "Malay", "Hebrew", "Estonian", "Tagalog", "Afrikaans" 38 | 39 | **12 PLs** are: 40 | "python", "java", "javascript", "csharp", "go", "kotlin", "perl", "php", "ruby", "scala", "swift", "typescript" 41 | 42 | 43 | image 44 | 45 | 46 | ### Usage with HuggingFace datasets🤗 47 | You can also use [🤗**HuggingFace datasets**](https://huggingface.co/datasets/floatai/HumanEval-XL) to load a specific dataset and language of our dataset!!! 48 | ```python 49 | from datasets import load_dataset 50 | dataset = load_dataset("floatai/HumanEval-XL", "python") 51 | DatasetDict({ 52 | English: Dataset({ 53 | features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'], 54 | num_rows: 80 55 | }) 56 | Russian: Dataset({ 57 | features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'], 58 | num_rows: 80 59 | }) 60 | Chinese: Dataset({ 61 | features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'], 62 | num_rows: 80 63 | }) 64 | 65 | ⋮ 66 | 67 | Afrikaans: Dataset({ 68 | features: ['task_id', 'language', 'prompt', 'description', 'test', 'entry_point', 'canonical_solution', 'natural_language'], 69 | num_rows: 80 70 | }) 71 | }) 72 | 73 | ``` 74 | 75 | If you have error in loading the data, please try force_redownload: 76 | ```python 77 | dataset = load_dataset("floatai/HumanEval-XL", "python", download_mode="force_redownload") 78 | ``` 79 | 80 | ### Data Instances 81 | 82 | An example of a dataset instance (In python split with Chinese prompts - dataset["Chinese"][0]): 83 | 84 | ```python 85 | { 86 | 'task_id': 'python/0', 87 | 'language': 'python', 88 | 'prompt': 'from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n """ 你会得到一个银行账户的存款和取款操作列表,该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下,并在该点返回True。否则应返回False。\n \n >>> below_zero([1, 2, 3])\n False\n >>> below_zero([1, 2, -4, 5])\n True\n """\n', 89 | 'description': '你会得到一个银行账户的存款和取款操作列表,该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下,并在该点返回True。否则应返回False。\n ', 90 | 'test': "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate([]) == False\n assert candidate([1, 2, -3, 1, 2, -3]) == False\n assert candidate([1, 2, -4, 5, 6]) == True\n assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\n assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\n assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\n", 91 | 'entry_point': 'below_zero', 92 | 'canonical_solution': ' balance = 0\n\n for op in operations:\n balance += op\n if balance < 0:\n return True\n\n return False\n', 93 | 'natural_language': 'Chinese' 94 | } 95 | ``` 96 | 97 | ### Data Fields 98 | 99 | - `task_id`: identifier for the data sample 100 | - `prompt`: input for the model containing function header and docstrings 101 | - `canonical_solution`: solution for the problem in the `prompt` 102 | - `description`: task description 103 | - `test`: contains function to test generated code for correctness 104 | - `entry_point`: entry point for test 105 | - `language`: programming lanuage identifier to call the appropriate subprocess call for program execution 106 | - `natural_language`: natural language identifier to show the language the prompt is in 107 | 108 | 109 | ### Data Splits 110 | programming languages are used to speicify splits: 111 | - python 112 | - java 113 | - javascript 114 | - csharp 115 | - go 116 | - kotlin 117 | - php 118 | - perl 119 | - ruby 120 | - swift 121 | - scala 122 | - typescript 123 | 124 | ## Evaluation 125 | ### Installation 126 | 127 | Check out and install this repository: 128 | ``` 129 | git clone git@github.com:floatai/HumanEval-XL.git 130 | cd mxeval 131 | pip install -e mxeval 132 | ``` 133 | 134 | ### Dependencies 135 | We provide scripts to help set up programming language dependencies that are used to execute and evaluate using dataset. 136 | (We use the same scripts from https://github.com/amazon-science/mxeval for code generation evaluation) 137 | 138 | #### Amazon Linux AMI 139 | ``` 140 | bash language_setup/amazon_linux_ami.sh 141 | ``` 142 | #### Ubuntu 143 | ``` 144 | bash language_setup/ubuntu.sh 145 | ``` 146 | 147 | ## Evaluation Usage 148 | 149 | **This program exists to run untrusted model-generated code. Users are strongly 150 | encouraged not to do so outside of a robust security sandbox. See the comment in 151 | `execution.py` for more information and instructions.** 152 | (We use the same scripts from https://github.com/amazon-science/mxeval for code generation evaluation) 153 | 154 | Each sample is formatted into a single line: 155 | ``` 156 | {"task_id": "Corresponding task ID", "completion": "Completion only without the prompt", 157 | "language": "programming language name"} 158 | ``` 159 | We provide `python_chinese_generated_samples.jsonl` to illustrate the format. 160 | 161 | Here is nearly functional example code (you just have to provide 162 | `generate_one_completion` to make it work) that saves generated completions to 163 | `samples.jsonl`. 164 | ``` 165 | from mxeval.data import write_jsonl, read_problems 166 | 167 | problems = read_problems() 168 | 169 | num_samples_per_task = 200 170 | samples = [ 171 | dict(task_id=task_id, language=problems[task_id]["language"], completion=generate_one_completion(problems[task_id]["prompt"])) 172 | for task_id in problems 173 | for _ in range(num_samples_per_task) 174 | ] 175 | write_jsonl("samples.jsonl", samples) 176 | ``` 177 | 178 | To evaluate the samples for, e.g., Python, Chinese evaluation, run 179 | ``` 180 | evaluate_functional_correctness python_chinese_generated_samples.jsonl --problem_file data/python/Chinese.jsonl 181 | ``` 182 | 183 | Note: Because there is no unbiased way of estimating pass@k when there are fewer 184 | samples than k, the script does not evaluate pass@k for these cases. To 185 | evaluate with other k values, pass `--k `. For 186 | other options, see 187 | ``` 188 | $ evaluate_functional_correctness --help 189 | ``` 190 | However, we recommend that you use the default values for the rest. 191 | 192 | ## Credits 193 | We adapted Amazon-science's mxeval package (https://github.com/amazon-science/mxeval) for the evaluation. We thank Amazon for their pioneering effort in this field including the release of the dataset and evaluation code. 194 | 195 | We also appreciate the open-source contributions on [`floatai/HumanEval-XL`](https://huggingface.co/datasets/floatai/HumanEval-XL) dataset: 196 | - [Neil77/HumanEval-XL](https://huggingface.co/datasets/iNeil77/HumanEval-XL) 197 | - vllm code evaluation: [iNeil77/vllm-code-harness](https://github.com/iNeil77/vllm-code-harness) 198 | 199 | ## Citation 200 | 201 | ``` 202 | @inproceedings{peng-etal-2024-humaneval, 203 | title = "{H}uman{E}val-{XL}: A Multilingual Code Generation Benchmark for Cross-lingual Natural Language Generalization", 204 | author = "Peng, Qiwei and 205 | Chai, Yekun and 206 | Li, Xuhong", 207 | editor = "Calzolari, Nicoletta and 208 | Kan, Min-Yen and 209 | Hoste, Veronique and 210 | Lenci, Alessandro and 211 | Sakti, Sakriani and 212 | Xue, Nianwen", 213 | booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)", 214 | month = may, 215 | year = "2024", 216 | address = "Torino, Italia", 217 | publisher = "ELRA and ICCL", 218 | url = "https://aclanthology.org/2024.lrec-main.735/", 219 | pages = "8383--8394", 220 | abstract = "Large language models (LLMs) have made significant progress in generating codes from textual prompts. However, existing benchmarks have mainly concentrated on translating English prompts to multilingual codes or have been constrained to very limited natural languages (NLs). These benchmarks have overlooked the vast landscape of massively multilingual NL to multilingual code, leaving a critical gap in the evaluation of multilingual LLMs. In response, we introduce HumanEval-XL, a massively multilingual code generation benchmark specifically crafted to address this deficiency. HumanEval-XL establishes connections between 23 NLs and 12 programming languages (PLs), and comprises of a collection of 22,080 prompts with an average of 8.33 test cases. By ensuring parallel data across multiple NLs and PLs, HumanEval-XL offers a comprehensive evaluation platform for multilingual LLMs, allowing the assessment of the understanding of different NLs. Our work serves as a pioneering step towards filling the void in evaluating NL generalization in the area of multilingual code generation. We make our evaluation code and data publicly available at https://github.com/floatai/HumanEval-XL." 221 | } 222 | ``` 223 | -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/data/.DS_Store -------------------------------------------------------------------------------- /data/python/Chinese.jsonl: -------------------------------------------------------------------------------- 1 | {"task_id": "python/0", "prompt": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n \"\"\" 你会得到一个银行账户的存款和取款操作列表,该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下,并在该点返回True。否则应返回False。\n \n >>> below_zero([1, 2, 3])\n False\n >>> below_zero([1, 2, -4, 5])\n True\n \"\"\"\n", "entry_point": "below_zero", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate([]) == False\n assert candidate([1, 2, -3, 1, 2, -3]) == False\n assert candidate([1, 2, -4, 5, 6]) == True\n assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\n assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\n assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\n", "language": "python", "canonical_solution": " balance = 0\n\n for op in operations:\n balance += op\n if balance < 0:\n return True\n\n return False\n", "description": "你会得到一个银行账户的存款和取款操作列表,该账户从零余额开始。你的任务是检测账户余额是否在任何时候降至零以下,并在该点返回True。否则应返回False。\n ", "natural_language": "Chinese"} 2 | {"task_id": "python/1", "prompt": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n \"\"\" 对于给定的整数列表,返回一个元组,其中包含列表中所有整数的和与积。\n空列表的和应该等于0,空列表的积应该等于1。\n \n >>> sum_product([])\n (0, 1)\n >>> sum_product([1, 2, 3, 4])\n (10, 24)\n \"\"\"\n", "entry_point": "sum_product", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate([]) == (0, 1)\n assert candidate([1, 1, 1]) == (3, 1)\n assert candidate([100, 0]) == (100, 0)\n assert candidate([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\n assert candidate([10]) == (10, 10)\n", "language": "python", "canonical_solution": " sum_value = 0\n prod_value = 1\n\n for n in numbers:\n sum_value += n\n prod_value *= n\n return sum_value, prod_value\n", "description": "对于给定的整数列表,返回一个元组,其中包含列表中所有整数的和与积。\n空列表的和应该等于0,空列表的积应该等于1。\n ", "natural_language": "Chinese"} 3 | {"task_id": "python/2", "prompt": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n \"\"\" 输入两个仅由1和0组成的字符串a和b。\n对这些输入执行二进制异或,并将结果作为字符串返回。\n \n >>> string_xor('010', '110')\n '100'\n \"\"\"\n", "entry_point": "string_xor", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate('111000', '101010') == '010010'\n assert candidate('1', '1') == '0'\n assert candidate('0101', '0000') == '0101'\n", "language": "python", "canonical_solution": " def xor(i, j):\n if i == j:\n return '0'\n else:\n return '1'\n\n return ''.join(xor(x, y) for x, y in zip(a, b))\n", "description": "输入两个仅由1和0组成的字符串a和b。\n对这些输入执行二进制异或,并将结果作为字符串返回。\n ", "natural_language": "Chinese"} 4 | {"task_id": "python/3", "prompt": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n \"\"\" 从字符串列表中返回最长的字符串。如果有多个长度相同的字符串,则返回第一个字符串。如果输入列表为空,则返回null。\n \n >>> longest([])\n\n >>> longest(['a', 'b', 'c'])\n 'a'\n >>> longest(['a', 'bb', 'ccc'])\n 'ccc'\n \"\"\"\n", "entry_point": "longest", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate([]) == None\n assert candidate(['x', 'y', 'z']) == 'x'\n assert candidate(['x', 'yyy', 'zzzz', 'www', 'kkkk', 'abc']) == 'zzzz'\n", "language": "python", "canonical_solution": " if not strings:\n return None\n\n maxlen = max(len(x) for x in strings)\n for s in strings:\n if len(s) == maxlen:\n return s\n", "description": "从字符串列表中返回最长的字符串。如果有多个长度相同的字符串,则返回第一个字符串。如果输入列表为空,则返回null。\n ", "natural_language": "Chinese"} 5 | {"task_id": "python/4", "prompt": "\n\ndef greatest_common_divisor(a: int, b: int) -> int:\n \"\"\" 返回两个整数a和b的最大公约数\n \n >>> greatest_common_divisor(3, 5)\n 1\n >>> greatest_common_divisor(25, 15)\n 5\n \"\"\"\n", "entry_point": "greatest_common_divisor", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate(3, 7) == 1\n assert candidate(10, 15) == 5\n assert candidate(49, 14) == 7\n assert candidate(144, 60) == 12\n", "language": "python", "canonical_solution": " while b:\n a, b = b, a % b\n return a\n", "description": "返回两个整数a和b的最大公约数\n ", "natural_language": "Chinese"} 6 | {"task_id": "python/5", "prompt": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n \"\"\" 输入一个由“零”到“九”数字组成的以空格分隔的字符串。\n有效选择为“零”、“一”、“二”、“三”、“四”、“五”、“六”、“七”、“八”和“九”。\n返回按从小到大排序的数字字符串。\n \n >>> sort_numbers('three one five')\n 'one three five'\n \"\"\"\n", "entry_point": "sort_numbers", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate('') == ''\n assert candidate('three') == 'three'\n assert candidate('three five nine') == 'three five nine'\n assert candidate('five zero four seven nine eight') == 'zero four five seven eight nine'\n assert candidate('six five four three two one zero') == 'zero one two three four five six'\n", "language": "python", "canonical_solution": " value_map = {\n 'zero': 0,\n 'one': 1,\n 'two': 2,\n 'three': 3,\n 'four': 4,\n 'five': 5,\n 'six': 6,\n 'seven': 7,\n 'eight': 8,\n 'nine': 9\n }\n return ' '.join(sorted([x for x in numbers.split(' ') if x], key=lambda x: value_map[x]))\n", "description": "输入一个由“零”到“九”数字组成的以空格分隔的字符串。\n有效选择为“零”、“一”、“二”、“三”、“四”、“五”、“六”、“七”、“八”和“九”。\n返回按从小到大排序的数字字符串。\n ", "natural_language": "Chinese"} 7 | {"task_id": "python/6", "prompt": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n \"\"\" 给定一个数字列表(至少有两个元素),对该列表应用线性变换,使最小的数字变为0,最大的数字变为1。\n \n >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n [0.0, 0.25, 0.5, 0.75, 1.0]\n \"\"\"\n", "entry_point": "rescale_to_unit", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate([2.0, 49.9]) == [0.0, 1.0]\n assert candidate([100.0, 49.9]) == [1.0, 0.0]\n assert candidate([1.0, 2.0, 3.0, 4.0, 5.0]) == [0.0, 0.25, 0.5, 0.75, 1.0]\n assert candidate([2.0, 1.0, 5.0, 3.0, 4.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n assert candidate([12.0, 11.0, 15.0, 13.0, 14.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n", "language": "python", "canonical_solution": " min_number = min(numbers)\n max_number = max(numbers)\n return [(x - min_number) / (max_number - min_number) for x in numbers]\n", "description": "给定一个数字列表(至少有两个元素),对该列表应用线性变换,使最小的数字变为0,最大的数字变为1。\n ", "natural_language": "Chinese"} 8 | {"task_id": "python/7", "prompt": "\n\ndef flip_case(string: str) -> str:\n \"\"\" 对于给定的字符串,将小写字符翻转为大写,将大写字符翻转为小写。\n \n >>> flip_case('Hello')\n 'hELLO'\n \"\"\"\n", "entry_point": "flip_case", "test": "\n\nMETADATA = {\n 'author': 'jt',\n 'dataset': 'test'\n}\n\n\ndef check(candidate):\n assert candidate('') == ''\n assert candidate('Hello!') == 'hELLO!'\n assert candidate('These violent delights have violent ends') == 'tHESE VIOLENT DELIGHTS HAVE VIOLENT ENDS'\n", "language": "python", "canonical_solution": " return string.swapcase()\n", "description": "对于给定的字符串,将小写字符翻转为大写,将大写字符翻转为小写。\n ", "natural_language": "Chinese"} 9 | {"task_id": "python/8", "prompt": "\n\ndef get_positive(l: list):\n \"\"\"返回列表中仅为正数的数字。\n \n >>> get_positive([-1, 2, -4, 5, 6])\n [2, 5, 6]\n >>> get_positive([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n [5, 3, 2, 3, 9, 123, 1]\n \"\"\"\n", "entry_point": "get_positive", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([-1, -2, 4, 5, 6]) == [4, 5, 6]\n assert candidate([5, 3, -5, 2, 3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 3, 9, 123, 1]\n assert candidate([-1, -2]) == []\n assert candidate([]) == []\n\n", "language": "python", "canonical_solution": " return [e for e in l if e > 0]\n", "description": "返回列表中仅为正数的数字。\n ", "natural_language": "Chinese"} 10 | {"task_id": "python/9", "prompt": "\n\ndef is_prime(n):\n \"\"\"如果给定的数字是质数,则返回true,否则返回false。\n \n >>> is_prime(6)\n False\n >>> is_prime(101)\n True\n >>> is_prime(11)\n True\n >>> is_prime(13441)\n True\n >>> is_prime(61)\n True\n >>> is_prime(4)\n False\n >>> is_prime(1)\n False\n \"\"\"\n", "entry_point": "is_prime", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(6) == False\n assert candidate(101) == True\n assert candidate(11) == True\n assert candidate(13441) == True\n assert candidate(61) == True\n assert candidate(4) == False\n assert candidate(1) == False\n assert candidate(5) == True\n assert candidate(11) == True\n assert candidate(17) == True\n assert candidate(5 * 17) == False\n assert candidate(11 * 7) == False\n assert candidate(13441 * 19) == False\n\n", "language": "python", "canonical_solution": " if n < 2:\n return False\n for k in range(2, n - 1):\n if n % k == 0:\n return False\n return True\n", "description": "如果给定的数字是质数,则返回true,否则返回false。\n ", "natural_language": "Chinese"} 11 | {"task_id": "python/10", "prompt": "\n\ndef unique(l: list):\n \"\"\"返回列表中排序后的唯一元素\n \n >>> unique([5, 3, 5, 2, 3, 3, 9, 0, 123])\n [0, 2, 3, 5, 9, 123]\n \"\"\"\n", "entry_point": "unique", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([5, 3, 5, 2, 3, 3, 9, 0, 123]) == [0, 2, 3, 5, 9, 123]\n\n", "language": "python", "canonical_solution": " return sorted(list(set(l)))\n", "description": "返回列表中排序后的唯一元素\n ", "natural_language": "Chinese"} 12 | {"task_id": "python/11", "prompt": "\n\ndef prime_fib(n: int):\n \"\"\"\n prime_fib 返回第 n 个既是斐波那契数又是质数的数。\n \n >>> prime_fib(1)\n 2\n >>> prime_fib(2)\n 3\n >>> prime_fib(3)\n 5\n >>> prime_fib(4)\n 13\n >>> prime_fib(5)\n 89\n \"\"\"\n", "entry_point": "prime_fib", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(1) == 2\n assert candidate(2) == 3\n assert candidate(3) == 5\n assert candidate(4) == 13\n assert candidate(5) == 89\n assert candidate(6) == 233\n assert candidate(7) == 1597\n assert candidate(8) == 28657\n assert candidate(9) == 514229\n assert candidate(10) == 433494437\n\n", "language": "python", "canonical_solution": " import math\n\n def is_prime(p):\n if p < 2:\n return False\n for k in range(2, min(int(math.sqrt(p)) + 1, p - 1)):\n if p % k == 0:\n return False\n return True\n f = [0, 1]\n while True:\n f.append(f[-1] + f[-2])\n if is_prime(f[-1]):\n n -= 1\n if n == 0:\n return f[-1]\n", "description": "prime_fib 返回第 n 个既是斐波那契数又是质数的数。\n ", "natural_language": "Chinese"} 13 | {"task_id": "python/12", "prompt": "\n\ndef triples_sum_to_zero(l: list):\n \"\"\"\n triples_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在三个不同的元素相加等于零,则返回True,否则返回False。\n \n\n >>> triples_sum_to_zero([1, 3, 5, 0])\n False\n >>> triples_sum_to_zero([1, 3, -2, 1])\n True\n >>> triples_sum_to_zero([1, 2, 3, 7])\n False\n >>> triples_sum_to_zero([2, 4, -5, 3, 9, 7])\n True\n >>> triples_sum_to_zero([1])\n False\n \"\"\"\n", "entry_point": "triples_sum_to_zero", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([1, 3, 5, 0]) == False\n assert candidate([1, 3, 5, -1]) == False\n assert candidate([1, 3, -2, 1]) == True\n assert candidate([1, 2, 3, 7]) == False\n assert candidate([1, 2, 5, 7]) == False\n assert candidate([2, 4, -5, 3, 9, 7]) == True\n assert candidate([1]) == False\n assert candidate([1, 3, 5, -100]) == False\n assert candidate([100, 3, 5, -100]) == False\n\n", "language": "python", "canonical_solution": " for i in range(len(l)):\n for j in range(i + 1, len(l)):\n for k in range(j + 1, len(l)):\n if l[i] + l[j] + l[k] == 0:\n return True\n return False\n", "description": "triples_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在三个不同的元素相加等于零,则返回True,否则返回False。\n ", "natural_language": "Chinese"} 14 | {"task_id": "python/13", "prompt": "\n\ndef pairs_sum_to_zero(l):\n \"\"\"\n pairs_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在两个不同的元素相加等于零,则返回True,否则返回False。\n \n >>> pairs_sum_to_zero([1, 3, 5, 0])\n False\n >>> pairs_sum_to_zero([1, 3, -2, 1])\n False\n >>> pairs_sum_to_zero([1, 2, 3, 7])\n False\n >>> pairs_sum_to_zero([2, 4, -5, 3, 5, 7])\n True\n >>> pairs_sum_to_zero([1])\n False\n \"\"\"\n", "entry_point": "pairs_sum_to_zero", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([1, 3, 5, 0]) == False\n assert candidate([1, 3, -2, 1]) == False\n assert candidate([1, 2, 3, 7]) == False\n assert candidate([2, 4, -5, 3, 5, 7]) == True\n assert candidate([1]) == False\n\n assert candidate([-3, 9, -1, 3, 2, 30]) == True\n assert candidate([-3, 9, -1, 3, 2, 31]) == True\n assert candidate([-3, 9, -1, 4, 2, 30]) == False\n assert candidate([-3, 9, -1, 4, 2, 31]) == False\n\n", "language": "python", "canonical_solution": " for i, l1 in enumerate(l):\n for j in range(i + 1, len(l)):\n if l1 + l[j] == 0:\n return True\n return False\n", "description": "pairs_sum_to_zero 接受一个整数列表作为输入。\n如果列表中存在两个不同的元素相加等于零,则返回True,否则返回False。\n ", "natural_language": "Chinese"} 15 | {"task_id": "python/14", "prompt": "\n\ndef fib4(n: int):\n \"\"\"Fib4数列是一种类似于斐波那契数列的数列,定义如下:\n fib4(0) -> 0\n fib4(1) -> 0\n fib4(2) -> 2\n fib4(3) -> 0\n fib4(n) -> fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4)。\n 请编写一个函数高效地计算Fib4数列的第n个元素。不要使用递归。\n \n >>> fib4(5)\n 4\n >>> fib4(6)\n 8\n >>> fib4(7)\n 14\n \"\"\"\n", "entry_point": "fib4", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(5) == 4\n assert candidate(8) == 28\n assert candidate(10) == 104\n assert candidate(12) == 386\n\n", "language": "python", "canonical_solution": " results = [0, 0, 2, 0]\n if n < 4:\n return results[n]\n\n for _ in range(4, n + 1):\n results.append(results[-1] + results[-2] + results[-3] + results[-4])\n results.pop(0)\n\n return results[-1]\n", "description": "Fib4数列是一种类似于斐波那契数列的数列,定义如下:\n fib4(0) -> 0\n fib4(1) -> 0\n fib4(2) -> 2\n fib4(3) -> 0\n fib4(n) -> fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4)。\n 请编写一个函数高效地计算Fib4数列的第n个元素。不要使用递归。\n ", "natural_language": "Chinese"} 16 | {"task_id": "python/15", "prompt": "\n\ndef median(l: list):\n \"\"\"返回列表l中元素的中位数。\n \n >>> median([3, 1, 2, 4, 5])\n 3\n >>> median([-10, 4, 6, 1000, 10, 20])\n 15.0\n \"\"\"\n", "entry_point": "median", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([3, 1, 2, 4, 5]) == 3\n assert candidate([-10, 4, 6, 1000, 10, 20]) == 8.0\n assert candidate([5]) == 5\n assert candidate([6, 5]) == 5.5\n assert candidate([8, 1, 3, 9, 9, 2, 7]) == 7 \n\n", "language": "python", "canonical_solution": " l = sorted(l)\n if len(l) % 2 == 1:\n return l[len(l) // 2]\n else:\n return (l[len(l) // 2 - 1] + l[len(l) // 2]) / 2.0\n", "description": "返回列表l中元素的中位数。\n ", "natural_language": "Chinese"} 17 | {"task_id": "python/16", "prompt": "\n\ndef is_palindrome(text: str):\n \"\"\"\n 检查给定的字符串是否为回文。\n \n >>> is_palindrome('')\n True\n >>> is_palindrome('aba')\n True\n >>> is_palindrome('aaaaa')\n True\n >>> is_palindrome('zbcd')\n False\n \"\"\"\n", "entry_point": "is_palindrome", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate('') == True\n assert candidate('aba') == True\n assert candidate('aaaaa') == True\n assert candidate('zbcd') == False\n assert candidate('xywyx') == True\n assert candidate('xywyz') == False\n assert candidate('xywzx') == False\n\n", "language": "python", "canonical_solution": " for i in range(len(text)):\n if text[i] != text[len(text) - 1 - i]:\n return False\n return True\n", "description": "检查给定的字符串是否为回文。\n ", "natural_language": "Chinese"} 18 | {"task_id": "python/17", "prompt": "\n\ndef remove_vowels(text):\n \"\"\"\n remove_vowels是一个函数,它接受一个字符串并返回没有元音字母的字符串。\n \n >>> remove_vowels('')\n ''\n >>> remove_vowels(\"abcdef\\nghijklm\")\n 'bcdf\\nghjklm'\n >>> remove_vowels('abcdef')\n 'bcdf'\n >>> remove_vowels('aaaaa')\n ''\n >>> remove_vowels('aaBAA')\n 'B'\n >>> remove_vowels('zbcd')\n 'zbcd'\n \"\"\"\n", "entry_point": "remove_vowels", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate('') == ''\n assert candidate(\"abcdef\\nghijklm\") == 'bcdf\\nghjklm'\n assert candidate('fedcba') == 'fdcb'\n assert candidate('eeeee') == ''\n assert candidate('acBAA') == 'cB'\n assert candidate('EcBOO') == 'cB'\n assert candidate('ybcd') == 'ybcd'\n\n", "language": "python", "canonical_solution": " return \"\".join([s for s in text if s.lower() not in [\"a\", \"e\", \"i\", \"o\", \"u\"]])\n", "description": "remove_vowels是一个函数,它接受一个字符串并返回没有元音字母的字符串。\n ", "natural_language": "Chinese"} 19 | {"task_id": "python/18", "prompt": "\n\ndef below_threshold(l: list, t: int):\n \"\"\"如果列表l中的所有数字都低于阈值t,则返回True。\n \n >>> below_threshold([1, 2, 4, 10], 100)\n True\n >>> below_threshold([1, 20, 4, 10], 5)\n False\n \"\"\"\n", "entry_point": "below_threshold", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([1, 2, 4, 10], 100)\n assert not candidate([1, 20, 4, 10], 5)\n assert candidate([1, 20, 4, 10], 21)\n assert candidate([1, 20, 4, 10], 22)\n assert candidate([1, 8, 4, 10], 11)\n assert not candidate([1, 8, 4, 10], 10)\n\n", "language": "python", "canonical_solution": " for e in l:\n if e >= t:\n return False\n return True\n", "description": "如果列表l中的所有数字都低于阈值t,则返回True。\n ", "natural_language": "Chinese"} 20 | {"task_id": "python/19", "prompt": "\n\ndef add(x: int, y: int):\n \"\"\"添加两个数字 x 和 y\n \n >>> add(2, 3)\n 5\n >>> add(5, 7)\n 12\n \"\"\"\n", "entry_point": "add", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n import random\n\n assert candidate(0, 1) == 1\n assert candidate(1, 0) == 1\n assert candidate(2, 3) == 5\n assert candidate(5, 7) == 12\n assert candidate(7, 5) == 12\n\n for i in range(100):\n x, y = random.randint(0, 1000), random.randint(0, 1000)\n assert candidate(x, y) == x + y\n\n", "language": "python", "canonical_solution": " return x + y\n", "description": "添加两个数字 x 和 y\n ", "natural_language": "Chinese"} 21 | {"task_id": "python/20", "prompt": "\n\ndef same_chars(s0: str, s1: str):\n \"\"\"\n 检查两个单词是否具有相同的字符。\n \n >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')\n True\n >>> same_chars('abcd', 'dddddddabc')\n True\n >>> same_chars('dddddddabc', 'abcd')\n True\n >>> same_chars('eabcd', 'dddddddabc')\n False\n >>> same_chars('abcd', 'dddddddabce')\n False\n >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')\n False\n \"\"\"\n", "entry_point": "same_chars", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate('eabcdzzzz', 'dddzzzzzzzddeddabc') == True\n assert candidate('abcd', 'dddddddabc') == True\n assert candidate('dddddddabc', 'abcd') == True\n assert candidate('eabcd', 'dddddddabc') == False\n assert candidate('abcd', 'dddddddabcf') == False\n assert candidate('eabcdzzzz', 'dddzzzzzzzddddabc') == False\n assert candidate('aabb', 'aaccc') == False\n\n", "language": "python", "canonical_solution": " return set(s0) == set(s1)\n", "description": "检查两个单词是否具有相同的字符。\n ", "natural_language": "Chinese"} 22 | {"task_id": "python/21", "prompt": "\n\ndef fib(n: int):\n \"\"\"返回第n个斐波那契数。\n \n >>> fib(10)\n 55\n >>> fib(1)\n 1\n >>> fib(8)\n 21\n \"\"\"\n", "entry_point": "fib", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(10) == 55\n assert candidate(1) == 1\n assert candidate(8) == 21\n assert candidate(11) == 89\n assert candidate(12) == 144\n\n", "language": "python", "canonical_solution": " if n == 0:\n return 0\n if n == 1:\n return 1\n return fib(n - 1) + fib(n - 2)\n", "description": "返回第n个斐波那契数。\n ", "natural_language": "Chinese"} 23 | {"task_id": "python/22", "prompt": "\n\ndef common(l1: list, l2: list):\n \"\"\"返回两个列表中排序后的唯一公共元素。\n \n >>> common([1, 4, 3, 34, 653, 2, 5], [5, 7, 1, 5, 9, 653, 121])\n [1, 5, 653]\n >>> common([5, 3, 2, 8], [3, 2])\n [2, 3]\n\n \"\"\"\n", "entry_point": "common", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([1, 4, 3, 34, 653, 2, 5], [5, 7, 1, 5, 9, 653, 121]) == [1, 5, 653]\n assert candidate([5, 3, 2, 8], [3, 2]) == [2, 3]\n assert candidate([4, 3, 2, 8], [3, 2, 4]) == [2, 3, 4]\n assert candidate([4, 3, 2, 8], []) == []\n\n", "language": "python", "canonical_solution": " ret = set()\n for e1 in l1:\n for e2 in l2:\n if e1 == e2:\n ret.add(e1)\n return sorted(list(ret))\n", "description": "返回两个列表中排序后的唯一公共元素。\n ", "natural_language": "Chinese"} 24 | {"task_id": "python/23", "prompt": "\n\ndef largest_prime_factor(n: int):\n \"\"\"返回n的最大质因数。假设n>1且不是质数。\n \n >>> largest_prime_factor(13195)\n 29\n >>> largest_prime_factor(2048)\n 2\n \"\"\"\n", "entry_point": "largest_prime_factor", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(15) == 5\n assert candidate(27) == 3\n assert candidate(63) == 7\n assert candidate(330) == 11\n assert candidate(13195) == 29\n\n", "language": "python", "canonical_solution": " def is_prime(k):\n if k < 2:\n return False\n for i in range(2, k - 1):\n if k % i == 0:\n return False\n return True\n largest = 1\n for j in range(2, n + 1):\n if n % j == 0 and is_prime(j):\n largest = max(largest, j)\n return largest\n", "description": "返回n的最大质因数。假设n>1且不是质数。\n ", "natural_language": "Chinese"} 25 | {"task_id": "python/24", "prompt": "\n\ndef sum_to_n(n: int):\n \"\"\"sum_to_n是一个函数,它将从1加到n的数字相加。\n \n >>> sum_to_n(30)\n 465\n >>> sum_to_n(100)\n 5050\n >>> sum_to_n(5)\n 15\n >>> sum_to_n(10)\n 55\n >>> sum_to_n(1)\n 1\n \"\"\"\n", "entry_point": "sum_to_n", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(1) == 1\n assert candidate(6) == 21\n assert candidate(11) == 66\n assert candidate(30) == 465\n assert candidate(100) == 5050\n\n", "language": "python", "canonical_solution": " return sum(range(n + 1))\n", "description": "sum_to_n是一个函数,它将从1加到n的数字相加。\n ", "natural_language": "Chinese"} 26 | {"task_id": "python/25", "prompt": "\n\ndef derivative(xs: list):\n \"\"\" xs表示一个多项式的系数。\n xs[0] + xs[1] * x + xs[2] * x^2 + ....\n 返回该多项式的导数,形式不变。\n \n >>> derivative([3, 1, 2, 4, 5])\n [1, 4, 12, 20]\n >>> derivative([1, 2, 3])\n [2, 6]\n \"\"\"\n", "entry_point": "derivative", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\n assert candidate([1, 2, 3]) == [2, 6]\n assert candidate([3, 2, 1]) == [2, 2]\n assert candidate([3, 2, 1, 0, 4]) == [2, 2, 0, 16]\n assert candidate([1]) == []\n\n", "language": "python", "canonical_solution": " return [(i * x) for i, x in enumerate(xs)][1:]\n", "description": "xs表示一个多项式的系数。\n xs[0] + xs[1] * x + xs[2] * x^2 + ....\n 返回该多项式的导数,形式不变。\n ", "natural_language": "Chinese"} 27 | {"task_id": "python/26", "prompt": "\n\ndef fibfib(n: int):\n \"\"\"FibFib数列是一种类似于斐波那契数列的数列,定义如下:\n fibfib(0) == 0\n fibfib(1) == 0\n fibfib(2) == 1\n fibfib(n) == fibfib(n-1) + fibfib(n-2) + fibfib(n-3)。\n 请编写一个函数,高效地计算FibFib数列的第n个元素。\n \n >>> fibfib(1)\n 0\n >>> fibfib(5)\n 4\n >>> fibfib(8)\n 24\n \"\"\"\n", "entry_point": "fibfib", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n assert candidate(2) == 1\n assert candidate(1) == 0\n assert candidate(5) == 4\n assert candidate(8) == 24\n assert candidate(10) == 81\n assert candidate(12) == 274\n assert candidate(14) == 927\n\n", "language": "python", "canonical_solution": " if n == 0:\n return 0\n if n == 1:\n return 0\n if n == 2:\n return 1\n return fibfib(n - 1) + fibfib(n - 2) + fibfib(n - 3)\n", "description": "FibFib数列是一种类似于斐波那契数列的数列,定义如下:\n fibfib(0) == 0\n fibfib(1) == 0\n fibfib(2) == 1\n fibfib(n) == fibfib(n-1) + fibfib(n-2) + fibfib(n-3)。\n 请编写一个函数,高效地计算FibFib数列的第n个元素。\n ", "natural_language": "Chinese"} 28 | {"task_id": "python/27", "prompt": "\nFIX = \"\"\"\nAdd more test cases.\n\"\"\"\n\ndef vowels_count(s):\n \"\"\"编写一个函数vowels_count,它接受表示单词的字符串作为输入,并返回字符串中元音字母的数量。在这种情况下,元音字母是'a','e','i','o','u'。在这里,'y'也是元音字母,但仅当它在给定单词的末尾时。\n\n 例:\n \n >>> vowels_count(\"abcde\")\n 2\n >>> vowels_count(\"ACEDY\")\n 3\n \"\"\"\n", "entry_point": "vowels_count", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"abcde\") == 2, \"Test 1\"\n assert candidate(\"Alone\") == 3, \"Test 2\"\n assert candidate(\"key\") == 2, \"Test 3\"\n assert candidate(\"bye\") == 1, \"Test 4\"\n assert candidate(\"keY\") == 2, \"Test 5\"\n assert candidate(\"bYe\") == 1, \"Test 6\"\n assert candidate(\"ACEDY\") == 3, \"Test 7\"\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " vowels = \"aeiouAEIOU\"\n n_vowels = sum(c in vowels for c in s)\n if s[-1] == 'y' or s[-1] == 'Y':\n n_vowels += 1\n return n_vowels\n", "description": "编写一个函数vowels_count,它接受表示单词的字符串作为输入,并返回字符串中元音字母的数量。在这种情况下,元音字母是'a','e','i','o','u'。在这里,'y'也是元音字母,但仅当它在给定单词的末尾时。\n\n 例:\n ", "natural_language": "Chinese"} 29 | {"task_id": "python/28", "prompt": "\ndef search(lst):\n '''\n 给定一个非空的正整数列表。返回大于零且频率大于或等于该整数本身值的最大整数。整数的频率是它在列表中出现的次数。如果不存在这样的值,则返回-1。示例:\n \n search([4, 1, 2, 2, 3, 1]) == 2\n search([1, 2, 2, 3, 3, 3, 4, 4, 4]) == 3\n search([5, 5, 4, 4, 4]) == -1\n '''\n", "entry_point": "search", "test": "def check(candidate):\n\n # manually generated tests\n assert candidate([5, 5, 5, 5, 1]) == 1\n assert candidate([4, 1, 4, 1, 4, 4]) == 4\n assert candidate([3, 3]) == -1\n assert candidate([8, 8, 8, 8, 8, 8, 8, 8]) == 8\n assert candidate([2, 3, 3, 2, 2]) == 2\n\n # automatically generated tests\n assert candidate([2, 7, 8, 8, 4, 8, 7, 3, 9, 6, 5, 10, 4, 3, 6, 7, 1, 7, 4, 10, 8, 1]) == 1\n assert candidate([3, 2, 8, 2]) == 2\n assert candidate([6, 7, 1, 8, 8, 10, 5, 8, 5, 3, 10]) == 1\n assert candidate([8, 8, 3, 6, 5, 6, 4]) == -1\n assert candidate([6, 9, 6, 7, 1, 4, 7, 1, 8, 8, 9, 8, 10, 10, 8, 4, 10, 4, 10, 1, 2, 9, 5, 7, 9]) == 1\n assert candidate([1, 9, 10, 1, 3]) == 1\n assert candidate([6, 9, 7, 5, 8, 7, 5, 3, 7, 5, 10, 10, 3, 6, 10, 2, 8, 6, 5, 4, 9, 5, 3, 10]) == 5\n assert candidate([1]) == 1\n assert candidate([8, 8, 10, 6, 4, 3, 5, 8, 2, 4, 2, 8, 4, 6, 10, 4, 2, 1, 10, 2, 1, 1, 5]) == 4\n assert candidate([2, 10, 4, 8, 2, 10, 5, 1, 2, 9, 5, 5, 6, 3, 8, 6, 4, 10]) == 2\n assert candidate([1, 6, 10, 1, 6, 9, 10, 8, 6, 8, 7, 3]) == 1\n assert candidate([9, 2, 4, 1, 5, 1, 5, 2, 5, 7, 7, 7, 3, 10, 1, 5, 4, 2, 8, 4, 1, 9, 10, 7, 10, 2, 8, 10, 9, 4]) == 4\n assert candidate([2, 6, 4, 2, 8, 7, 5, 6, 4, 10, 4, 6, 3, 7, 8, 8, 3, 1, 4, 2, 2, 10, 7]) == 4\n assert candidate([9, 8, 6, 10, 2, 6, 10, 2, 7, 8, 10, 3, 8, 2, 6, 2, 3, 1]) == 2\n assert candidate([5, 5, 3, 9, 5, 6, 3, 2, 8, 5, 6, 10, 10, 6, 8, 4, 10, 7, 7, 10, 8]) == -1\n assert candidate([10]) == -1\n assert candidate([9, 7, 7, 2, 4, 7, 2, 10, 9, 7, 5, 7, 2]) == 2\n assert candidate([5, 4, 10, 2, 1, 1, 10, 3, 6, 1, 8]) == 1\n assert candidate([7, 9, 9, 9, 3, 4, 1, 5, 9, 1, 2, 1, 1, 10, 7, 5, 6, 7, 6, 7, 7, 6]) == 1\n assert candidate([3, 10, 10, 9, 2]) == -1\n\n", "language": "python", "canonical_solution": " frq = [0] * (max(lst) + 1)\n for i in lst:\n frq[i] += 1;\n\n ans = -1\n for i in range(1, len(frq)):\n if frq[i] >= i:\n ans = i\n \n return ans\n", "description": "给定一个非空的正整数列表。返回大于零且频率大于或等于该整数本身值的最大整数。整数的频率是它在列表中出现的次数。如果不存在这样的值,则返回-1。示例:\n ", "natural_language": "Chinese"} 30 | {"task_id": "python/29", "prompt": "\ndef triangle_area(a, b, c):\n '''\n 给定三角形的三条边长。如果这三条边可以组成一个有效的三角形,则返回保留两位小数的三角形面积。否则返回-1。当任意两条边的和大于第三条边时,三条边才能组成一个有效的三角形。例如:\n \n triangle_area(3, 4, 5) == 6.00\n triangle_area(1, 2, 10) == -1\n '''\n", "entry_point": "triangle_area", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(3, 4, 5) == 6.00, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate(1, 2, 10) == -1\n assert candidate(4, 8, 5) == 8.18\n assert candidate(2, 2, 2) == 1.73\n assert candidate(1, 2, 3) == -1\n assert candidate(10, 5, 7) == 16.25\n assert candidate(2, 6, 3) == -1\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(1, 1, 1) == 0.43, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate(2, 2, 10) == -1\n\n", "language": "python", "canonical_solution": " if a + b <= c or a + c <= b or b + c <= a:\n return -1 \n s = (a + b + c)/2 \n area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n area = round(area, 2)\n return area\n", "description": "给定三角形的三条边长。如果这三条边可以组成一个有效的三角形,则返回保留两位小数的三角形面积。否则返回-1。当任意两条边的和大于第三条边时,三条边才能组成一个有效的三角形。例如:\n ", "natural_language": "Chinese"} 31 | {"task_id": "python/30", "prompt": "\ndef will_it_fly(q,w):\n '''\n 编写一个函数,如果对象q能够飞行则返回True,否则返回False。\n 如果对象q平衡(它是一个回文列表)且其元素的总和小于或等于最大可能重量w,则对象q将飞行。\n\n 示例:\n will_it_fly([1, 2], 5) ➞ False \n # 1+2小于最大可能重量,但不平衡。\n\n will_it_fly([3, 2, 3], 1) ➞ False\n # 它是平衡的,但3+2+3大于最大可能重量。\n\n will_it_fly([3, 2, 3], 9) ➞ True\n # 3+2+3小于最大可能重量,且平衡。\n\n will_it_fly([3], 5) ➞ True\n # 3小于最大可能重量,且平衡。\n \n '''\n", "entry_point": "will_it_fly", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([3, 2, 3], 9) is True\n assert candidate([1, 2], 5) is False\n assert candidate([3], 5) is True\n assert candidate([3, 2, 3], 1) is False\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([1, 2, 3], 6) is False\n assert candidate([5], 5) is True\n\n", "language": "python", "canonical_solution": " if sum(q) > w:\n return False\n\n i, j = 0, len(q)-1\n while i False\n is_happy(aa) => False\n is_happy(abcd) => True\n is_happy(aabb) => False\n is_happy(adb) => True\n is_happy(xyy) => False\n \"\"\"\n", "entry_point": "is_happy", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"a\") == False , \"a\"\n assert candidate(\"aa\") == False , \"aa\"\n assert candidate(\"abcd\") == True , \"abcd\"\n assert candidate(\"aabb\") == False , \"aabb\"\n assert candidate(\"adb\") == True , \"adb\"\n assert candidate(\"xyy\") == False , \"xyy\"\n assert candidate(\"iopaxpoi\") == True , \"iopaxpoi\"\n assert candidate(\"iopaxioi\") == False , \"iopaxioi\"\n", "language": "python", "canonical_solution": " if len(s) < 3:\n return False\n\n for i in range(len(s) - 2):\n \n if s[i] == s[i+1] or s[i+1] == s[i+2] or s[i] == s[i+2]:\n return False\n return True\n", "description": "给定一个字符串s。\n你的任务是检查该字符串是否快乐。\n如果字符串的长度至少为3且每3个连续的字母都不同,则该字符串是快乐的。\n例如:\n ", "natural_language": "Chinese"} 35 | {"task_id": "python/34", "prompt": "\ndef numerical_letter_grade(grades):\n \"\"\"这是学期的最后一周,老师需要给学生们打分。老师一直在制定自己的评分算法。唯一的问题是,她丢失了用于评分的代码。她给了你一份学生的GPA清单,你需要编写一个函数,根据以下表格输出字母等级的清单:\n\n GPA | Letter grade\n 4.0 A+\n > 3.7 A \n > 3.3 A- \n > 3.0 B+\n > 2.7 B \n > 2.3 B-\n > 2.0 C+\n > 1.7 C\n > 1.3 C-\n > 1.0 D+ \n > 0.7 D \n > 0.0 D-\n 0.0 E\n \n\n 例如:\n \n grade_equation([4.0, 3, 1.7, 2, 3.5]) ==> ['A+', 'B', 'C-', 'C', 'A-']\n \"\"\"\n", "entry_point": "numerical_letter_grade", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([4.0, 3, 1.7, 2, 3.5]) == ['A+', 'B', 'C-', 'C', 'A-']\n assert candidate([1.2]) == ['D+']\n assert candidate([0.5]) == ['D-']\n assert candidate([0.0]) == ['E']\n assert candidate([1, 0.3, 1.5, 2.8, 3.3]) == ['D', 'D-', 'C-', 'B', 'B+']\n assert candidate([0, 0.7]) == ['E', 'D-']\n\n # Check some edge cases that are easy to work out by hand.\n assert True\n\n", "language": "python", "canonical_solution": "\n \n letter_grade = []\n for gpa in grades:\n if gpa == 4.0:\n letter_grade.append(\"A+\")\n elif gpa > 3.7:\n letter_grade.append(\"A\")\n elif gpa > 3.3:\n letter_grade.append(\"A-\")\n elif gpa > 3.0:\n letter_grade.append(\"B+\")\n elif gpa > 2.7:\n letter_grade.append(\"B\")\n elif gpa > 2.3:\n letter_grade.append(\"B-\")\n elif gpa > 2.0:\n letter_grade.append(\"C+\")\n elif gpa > 1.7:\n letter_grade.append(\"C\")\n elif gpa > 1.3:\n letter_grade.append(\"C-\")\n elif gpa > 1.0:\n letter_grade.append(\"D+\")\n elif gpa > 0.7:\n letter_grade.append(\"D\")\n elif gpa > 0.0:\n letter_grade.append(\"D-\")\n else:\n letter_grade.append(\"E\")\n return letter_grade\n", "description": "这是学期的最后一周,老师需要给学生们打分。老师一直在制定自己的评分算法。唯一的问题是,她丢失了用于评分的代码。她给了你一份学生的GPA清单,你需要编写一个函数,根据以下表格输出字母等级的清单:\n\n GPA | Letter grade\n 4.0 A+\n > 3.7 A \n > 3.3 A- \n > 3.0 B+\n > 2.7 B \n > 2.3 B-\n > 2.0 C+\n > 1.7 C\n > 1.3 C-\n > 1.0 D+ \n > 0.7 D \n > 0.0 D-\n 0.0 E\n \n\n 例如:\n ", "natural_language": "Chinese"} 36 | {"task_id": "python/35", "prompt": "\ndef prime_length(string):\n \"\"\"编写一个函数,它接受一个字符串并返回True,如果字符串长度是一个质数,否则返回False。\n示例:\n \n prime_length('Hello') == True\n prime_length('abcdcba') == True\n prime_length('kittens') == True\n prime_length('orange') == False\n \"\"\"\n", "entry_point": "prime_length", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate('Hello') == True\n assert candidate('abcdcba') == True\n assert candidate('kittens') == True\n assert candidate('orange') == False\n assert candidate('wow') == True\n assert candidate('world') == True\n assert candidate('MadaM') == True\n assert candidate('Wow') == True\n assert candidate('') == False\n assert candidate('HI') == True\n assert candidate('go') == True\n assert candidate('gogo') == False\n assert candidate('aaaaaaaaaaaaaaa') == False\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate('Madam') == True\n assert candidate('M') == False\n assert candidate('0') == False\n\n", "language": "python", "canonical_solution": " l = len(string)\n if l == 0 or l == 1:\n return False\n for i in range(2, l):\n if l % i == 0:\n return False\n return True\n", "description": "编写一个函数,它接受一个字符串并返回True,如果字符串长度是一个质数,否则返回False。\n示例:\n ", "natural_language": "Chinese"} 37 | {"task_id": "python/36", "prompt": "\ndef solve(N):\n \"\"\"给定一个正整数N,返回其二进制表示下所有数字的总和。\n\n 示例\n 对于N = 1000,数字总和为1,输出应为“1”。\n 对于N = 150,数字总和为6,输出应为“110”。\n 对于N = 147,数字总和为12,输出应为“1100”。\n\n 变量:\n @N 整数\n 约束条件:0 ≤ N ≤ 10000。\n 输出:\n 一个二进制数字的字符串。\n \n \"\"\"\n", "entry_point": "solve", "test": "def check(candidate):\n\n # Check some simple cases\n assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate(1000) == \"1\", \"Error\"\n assert candidate(150) == \"110\", \"Error\"\n assert candidate(147) == \"1100\", \"Error\"\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate(333) == \"1001\", \"Error\"\n assert candidate(963) == \"10010\", \"Error\"\n\n", "language": "python", "canonical_solution": " return bin(sum(int(i) for i in str(N)))[2:]\n", "description": "给定一个正整数N,返回其二进制表示下所有数字的总和。\n\n 示例\n 对于N = 1000,数字总和为1,输出应为“1”。\n 对于N = 150,数字总和为6,输出应为“110”。\n 对于N = 147,数字总和为12,输出应为“1100”。\n\n 变量:\n @N 整数\n 约束条件:0 ≤ N ≤ 10000。\n 输出:\n 一个二进制数字的字符串。\n ", "natural_language": "Chinese"} 38 | {"task_id": "python/37", "prompt": "\ndef get_row(lst, x):\n \"\"\"\n 给定一个二维数据,作为嵌套列表,类似于矩阵,但与矩阵不同的是,每行可能包含不同数量的列。给定lst和整数x,在列表中查找整数x,并返回元组列表[(x1,y1),(x2,y2)...],使得每个元组都是一个坐标-(行,列),从0开始。最初按行按升序排序坐标。此外,按列按降序排序行的坐标。\n\n 例子:\n \n get_row([\n [1,2,3,4,5,6],\n [1,2,3,4,1,6],\n [1,2,3,4,5,1]\n ], 1) == [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]\n get_row([], 1) == []\n get_row([[], [1], [1, 2, 3]], 3) == [(2, 2)]\n \"\"\"\n", "entry_point": "get_row", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([\n [1,2,3,4,5,6],\n [1,2,3,4,1,6],\n [1,2,3,4,5,1]\n ], 1) == [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]\n assert candidate([\n [1,2,3,4,5,6],\n [1,2,3,4,5,6],\n [1,2,3,4,5,6],\n [1,2,3,4,5,6],\n [1,2,3,4,5,6],\n [1,2,3,4,5,6]\n ], 2) == [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)]\n assert candidate([\n [1,2,3,4,5,6],\n [1,2,3,4,5,6],\n [1,1,3,4,5,6],\n [1,2,1,4,5,6],\n [1,2,3,1,5,6],\n [1,2,3,4,1,6],\n [1,2,3,4,5,1]\n ], 1) == [(0, 0), (1, 0), (2, 1), (2, 0), (3, 2), (3, 0), (4, 3), (4, 0), (5, 4), (5, 0), (6, 5), (6, 0)]\n assert candidate([], 1) == []\n assert candidate([[1]], 2) == []\n assert candidate([[], [1], [1, 2, 3]], 3) == [(2, 2)]\n\n # Check some edge cases that are easy to work out by hand.\n assert True\n\n", "language": "python", "canonical_solution": " coords = [(i, j) for i in range(len(lst)) for j in range(len(lst[i])) if lst[i][j] == x]\n return sorted(sorted(coords, key=lambda x: x[1], reverse=True), key=lambda x: x[0])\n", "description": "给定一个二维数据,作为嵌套列表,类似于矩阵,但与矩阵不同的是,每行可能包含不同数量的列。给定lst和整数x,在列表中查找整数x,并返回元组列表[(x1,y1),(x2,y2)...],使得每个元组都是一个坐标-(行,列),从0开始。最初按行按升序排序坐标。此外,按列按降序排序行的坐标。\n\n 例子:\n ", "natural_language": "Chinese"} 39 | {"task_id": "python/38", "prompt": "\ndef next_smallest(lst):\n \"\"\"\n 你有一个整数列表。\n编写一个函数next_smallest(),返回列表中第二小的元素。\n如果没有这样的元素,则返回null。\n \n \n next_smallest([1, 2, 3, 4, 5]) == 2\n next_smallest([5, 1, 4, 3, 2]) == 2\n next_smallest([]) == None\n next_smallest([1, 1]) == None\n \"\"\"\n", "entry_point": "next_smallest", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([1, 2, 3, 4, 5]) == 2\n assert candidate([5, 1, 4, 3, 2]) == 2\n assert candidate([]) == None\n assert candidate([1, 1]) == None\n assert candidate([1,1,1,1,0]) == 1\n assert candidate([1, 0**0]) == None\n assert candidate([-35, 34, 12, -45]) == -35\n\n # Check some edge cases that are easy to work out by hand.\n assert True\n\n", "language": "python", "canonical_solution": " lst = sorted(set(lst))\n return None if len(lst) < 2 else lst[1]\n", "description": "你有一个整数列表。\n编写一个函数next_smallest(),返回列表中第二小的元素。\n如果没有这样的元素,则返回null。\n ", "natural_language": "Chinese"} 40 | {"task_id": "python/39", "prompt": "\ndef is_bored(S):\n \"\"\"\n 你将会得到一个由单词组成的字符串,你的任务是计算无聊的数量。无聊的句子是以单词\"I\"开头的句子。句子以'.'、'?'或'!'为分隔符。\n\n 例如:\n \n >>> is_bored(\"Hello world\")\n 0\n >>> is_bored(\"The sky is blue. The sun is shining. I love this weather\")\n 1\n \"\"\"\n", "entry_point": "is_bored", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"Hello world\") == 0, \"Test 1\"\n assert candidate(\"Is the sky blue?\") == 0, \"Test 2\"\n assert candidate(\"I love It !\") == 1, \"Test 3\"\n assert candidate(\"bIt\") == 0, \"Test 4\"\n assert candidate(\"I feel good today. I will be productive. will kill It\") == 2, \"Test 5\"\n assert candidate(\"You and I are going for a walk\") == 0, \"Test 6\"\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " import re\n sentences = re.split(r'[.?!]\\s*', S)\n return sum(sentence[0:2] == 'I ' for sentence in sentences)\n", "description": "你将会得到一个由单词组成的字符串,你的任务是计算无聊的数量。无聊的句子是以单词\"I\"开头的句子。句子以'.'、'?'或'!'为分隔符。\n\n 例如:\n ", "natural_language": "Chinese"} 41 | {"task_id": "python/40", "prompt": "\n\ndef skjkasdkd(lst):\n \"\"\"给定一个整数列表。\n你需要找到最大的质数值并返回其数字之和。\n\n 例子:\n \n For lst = [0,3,2,1,3,5,7,4,5,5,5,2,181,32,4,32,3,2,32,324,4,3] the output should be 10\n For lst = [1,0,1,8,2,4597,2,1,3,40,1,2,1,2,4,2,5,1] the output should be 25\n For lst = [1,3,1,32,5107,34,83278,109,163,23,2323,32,30,1,9,3] the output should be 13\n For lst = [0,724,32,71,99,32,6,0,5,91,83,0,5,6] the output should be 11\n For lst = [0,81,12,3,1,21] the output should be 3\n For lst = [0,8,1,2,1,7] the output should be 7\n \"\"\"\n", "entry_point": "skjkasdkd", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([0,3,2,1,3,5,7,4,5,5,5,2,181,32,4,32,3,2,32,324,4,3]) == 10, \"This prints if this assert fails 1 (good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([1,0,1,8,2,4597,2,1,3,40,1,2,1,2,4,2,5,1]) == 25, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([1,3,1,32,5107,34,83278,109,163,23,2323,32,30,1,9,3]) == 13, \"This prints if this assert fails 3 (also good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([0,724,32,71,99,32,6,0,5,91,83,0,5,6]) == 11, \"This prints if this assert fails 4 (also good for debugging!)\"\n \n # Check some edge cases that are easy to work out by hand.\n assert candidate([0,81,12,3,1,21]) == 3, \"This prints if this assert fails 5 (also good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([0,8,1,2,1,7]) == 7, \"This prints if this assert fails 6 (also good for debugging!)\"\n\n assert candidate([8191]) == 19, \"This prints if this assert fails 7 (also good for debugging!)\"\n assert candidate([8191, 123456, 127, 7]) == 19, \"This prints if this assert fails 8 (also good for debugging!)\"\n assert candidate([127, 97, 8192]) == 10, \"This prints if this assert fails 9 (also good for debugging!)\"\n", "language": "python", "canonical_solution": " def isPrime(n):\n for i in range(2,int(n**0.5)+1):\n if n%i==0:\n return False\n\n return True\n maxx = 0\n i = 0\n while i < len(lst):\n if(lst[i] > maxx and isPrime(lst[i])):\n maxx = lst[i]\n i+=1\n result = sum(int(digit) for digit in str(maxx))\n return result\n\n", "description": "给定一个整数列表。\n你需要找到最大的质数值并返回其数字之和。\n\n 例子:\n ", "natural_language": "Chinese"} 42 | {"task_id": "python/41", "prompt": "\ndef check_dict_case(dict):\n \"\"\"\n 给定一个字典,如果所有键都是小写字符串或所有键都是大写字符串,则返回True,否则返回False。如果给定的字典为空,则函数应返回False。示例:\n \n check_dict_case({\"a\":\"apple\", \"b\":\"banana\"}) should return True.\n check_dict_case({\"a\":\"apple\", \"A\":\"banana\", \"B\":\"banana\"}) should return False.\n check_dict_case({\"a\":\"apple\", 8:\"banana\", \"a\":\"apple\"}) should return False.\n check_dict_case({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) should return False.\n check_dict_case({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) should return True.\n \"\"\"\n", "entry_point": "check_dict_case", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate({\"p\":\"pineapple\", \"b\":\"banana\"}) == True, \"First test error: \" + str(candidate({\"p\":\"pineapple\", \"b\":\"banana\"}))\n assert candidate({\"p\":\"pineapple\", \"A\":\"banana\", \"B\":\"banana\"}) == False, \"Second test error: \" + str(candidate({\"p\":\"pineapple\", \"A\":\"banana\", \"B\":\"banana\"}))\n assert candidate({\"p\":\"pineapple\", 5:\"banana\", \"a\":\"apple\"}) == False, \"Third test error: \" + str(candidate({\"p\":\"pineapple\", 5:\"banana\", \"a\":\"apple\"}))\n assert candidate({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) == False, \"Fourth test error: \" + str(candidate({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}))\n assert candidate({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) == True, \"Fifth test error: \" + str(candidate({\"STATE\":\"NC\", \"ZIP\":\"12345\" })) \n assert candidate({\"fruit\":\"Orange\", \"taste\":\"Sweet\" }) == True, \"Fourth test error: \" + str(candidate({\"fruit\":\"Orange\", \"taste\":\"Sweet\" })) \n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate({}) == False, \"1st edge test error: \" + str(candidate({}))\n\n", "language": "python", "canonical_solution": " if len(dict.keys()) == 0:\n return False\n else:\n state = \"start\"\n for key in dict.keys():\n\n if isinstance(key, str) == False:\n state = \"mixed\"\n break\n if state == \"start\":\n if key.isupper():\n state = \"upper\"\n elif key.islower():\n state = \"lower\"\n else:\n break\n elif (state == \"upper\" and not key.isupper()) or (state == \"lower\" and not key.islower()):\n state = \"mixed\"\n break\n else:\n break\n return state == \"upper\" or state == \"lower\" \n", "description": "给定一个字典,如果所有键都是小写字符串或所有键都是大写字符串,则返回True,否则返回False。如果给定的字典为空,则函数应返回False。示例:\n ", "natural_language": "Chinese"} 43 | {"task_id": "python/42", "prompt": "\ndef closest_integer(value):\n '''\n 创建一个函数,它接受一个表示数字的值(字符串),并返回最接近它的整数。如果该数字距离两个整数相等,则将其四舍五入到远离零的方向。\n\n 例子:\n \n >>> closest_integer(\"10\")\n 10\n >>> closest_integer(\"15.3\")\n 15\n\n Note:\n Rounding away from zero means that if the given number is equidistant\n from two integers, the one you should return is the one that is the\n farthest from zero. For example closest_integer(\"14.5\") should\n return 15 and closest_integer(\"-14.5\") should return -15.\n '''\n", "entry_point": "closest_integer", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"10\") == 10, \"Test 1\"\n assert candidate(\"14.5\") == 15, \"Test 2\"\n assert candidate(\"-15.5\") == -16, \"Test 3\"\n assert candidate(\"15.3\") == 15, \"Test 3\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(\"0\") == 0, \"Test 0\"\n\n", "language": "python", "canonical_solution": " from math import floor, ceil\n\n if value.count('.') == 1:\n # remove trailing zeros\n while (value[-1] == '0'):\n value = value[:-1]\n\n num = float(value)\n if value[-2:] == '.5':\n if num > 0:\n res = ceil(num)\n else:\n res = floor(num)\n elif len(value) > 0:\n res = int(round(num))\n else:\n res = 0\n\n return res\n\n", "description": "创建一个函数,它接受一个表示数字的值(字符串),并返回最接近它的整数。如果该数字距离两个整数相等,则将其四舍五入到远离零的方向。\n\n 例子:\n ", "natural_language": "Chinese"} 44 | {"task_id": "python/43", "prompt": "\ndef make_a_pile(n):\n \"\"\"\n 给定一个正整数n,你需要建立一个n级的石头堆。\n第一层有n个石头。\n下一层的石头数量为:\n- 如果n是奇数,则为下一个奇数。\n- 如果n是偶数,则为下一个偶数。\n返回一个列表,其中索引为i的元素表示第(i+1)层的石头数量。\n\n 例子:\n \n >>> make_a_pile(3)\n [3, 5, 7]\n \"\"\"\n", "entry_point": "make_a_pile", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(3) == [3, 5, 7], \"Test 3\"\n assert candidate(4) == [4,6,8,10], \"Test 4\"\n assert candidate(5) == [5, 7, 9, 11, 13]\n assert candidate(6) == [6, 8, 10, 12, 14, 16]\n assert candidate(8) == [8, 10, 12, 14, 16, 18, 20, 22]\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " return [n + 2*i for i in range(n)]\n", "description": "给定一个正整数n,你需要建立一个n级的石头堆。\n第一层有n个石头。\n下一层的石头数量为:\n- 如果n是奇数,则为下一个奇数。\n- 如果n是偶数,则为下一个偶数。\n返回一个列表,其中索引为i的元素表示第(i+1)层的石头数量。\n\n 例子:\n ", "natural_language": "Chinese"} 45 | {"task_id": "python/44", "prompt": "\ndef words_string(s):\n \"\"\"\n 你将会得到一个由逗号或空格分隔的单词字符串。你的任务是将字符串分割成单词并返回一个单词数组。\n\n 例如:\n \n words_string(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n words_string(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n \"\"\"\n", "entry_point": "words_string", "test": "def check(candidate):\n\n # Check some simple cases\n assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n assert candidate(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n assert candidate(\"Hi, my name\") == [\"Hi\", \"my\", \"name\"]\n assert candidate(\"One,, two, three, four, five, six,\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate(\"\") == []\n assert candidate(\"ahmed , gamal\") == [\"ahmed\", \"gamal\"]\n\n", "language": "python", "canonical_solution": " if not s:\n return []\n\n s_list = []\n\n for letter in s:\n if letter == ',':\n s_list.append(' ')\n else:\n s_list.append(letter)\n\n s_list = \"\".join(s_list)\n return s_list.split()\n", "description": "你将会得到一个由逗号或空格分隔的单词字符串。你的任务是将字符串分割成单词并返回一个单词数组。\n\n 例如:\n ", "natural_language": "Chinese"} 46 | {"task_id": "python/45", "prompt": "\ndef choose_num(x, y):\n \"\"\"这个函数接受两个正整数x和y,并返回在[x,y]范围内的最大偶数整数。如果没有这样的数字,则函数应返回-1。\n\n 例如:\n \n choose_num(12, 15) = 14\n choose_num(13, 12) = -1\n \"\"\"\n", "entry_point": "choose_num", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(12, 15) == 14\n assert candidate(13, 12) == -1\n assert candidate(33, 12354) == 12354\n assert candidate(5234, 5233) == -1\n assert candidate(6, 29) == 28\n assert candidate(27, 10) == -1\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(7, 7) == -1\n assert candidate(546, 546) == 546\n\n", "language": "python", "canonical_solution": " if x > y:\n return -1\n if y % 2 == 0:\n return y\n if x == y:\n return -1\n return y - 1\n", "description": "这个函数接受两个正整数x和y,并返回在[x,y]范围内的最大偶数整数。如果没有这样的数字,则函数应返回-1。\n\n 例如:\n ", "natural_language": "Chinese"} 47 | {"task_id": "python/46", "prompt": "\ndef rounded_avg(n, m):\n \"\"\"给定两个正整数n和m,你的任务是计算从n到m(包括n和m)的整数的平均值。\n将答案四舍五入为最接近的整数,并将其转换为二进制。\n如果n大于m,则返回-1。\n例子:\n \n rounded_avg(1, 5) => \"0b11\"\n rounded_avg(7, 5) => -1\n rounded_avg(10, 20) => \"0b1111\"\n rounded_avg(20, 33) => \"0b11010\"\n \"\"\"\n", "entry_point": "rounded_avg", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(1, 5) == \"0b11\"\n assert candidate(7, 13) == \"0b1010\"\n assert candidate(964,977) == \"0b1111001010\"\n assert candidate(996,997) == \"0b1111100100\"\n assert candidate(560,851) == \"0b1011000010\"\n assert candidate(185,546) == \"0b101101110\"\n assert candidate(362,496) == \"0b110101101\"\n assert candidate(350,902) == \"0b1001110010\"\n assert candidate(197,233) == \"0b11010111\"\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(7, 5) == -1\n assert candidate(5, 1) == -1\n assert candidate(5, 5) == \"0b101\"\n\n", "language": "python", "canonical_solution": " if m < n:\n return -1\n summation = 0\n for i in range(n, m+1):\n summation += i\n return bin(round(summation/(m - n + 1)))\n", "description": "给定两个正整数n和m,你的任务是计算从n到m(包括n和m)的整数的平均值。\n将答案四舍五入为最接近的整数,并将其转换为二进制。\n如果n大于m,则返回-1。\n例子:\n ", "natural_language": "Chinese"} 48 | {"task_id": "python/47", "prompt": "\ndef f(n):\n \"\"\" 实现函数f,它以n为参数,返回一个大小为n的列表,其中索引i处的元素值为i的阶乘(如果i为偶数)或1到i的数字之和(如果i为奇数)。i从1开始。i的阶乘是从1到i的数字的乘积(1 * 2 * ... * i)。例如:\n \n f(5) == [1, 2, 6, 24, 15]\n \"\"\"\n", "entry_point": "f", "test": "def check(candidate):\n\n assert candidate(5) == [1, 2, 6, 24, 15]\n assert candidate(7) == [1, 2, 6, 24, 15, 720, 28]\n assert candidate(1) == [1]\n assert candidate(3) == [1, 2, 6]\n", "language": "python", "canonical_solution": " ret = []\n for i in range(1,n+1):\n if i%2 == 0:\n x = 1\n for j in range(1,i+1): x *= j\n ret += [x]\n else:\n x = 0\n for j in range(1,i+1): x += j\n ret += [x]\n return ret\n", "description": "实现函数f,它以n为参数,返回一个大小为n的列表,其中索引i处的元素值为i的阶乘(如果i为偶数)或1到i的数字之和(如果i为奇数)。i从1开始。i的阶乘是从1到i的数字的乘积(1 * 2 * ... * i)。例如:\n ", "natural_language": "Chinese"} 49 | {"task_id": "python/48", "prompt": "\ndef even_odd_palindrome(n):\n \"\"\"\n 给定一个正整数n,返回一个元组,其中包含在范围(1,n)内的偶数和奇数整数回文数的数量。\n\n 示例1:\n\n 输入:3\n 输出:(1,2)\n 说明:\n 整数回文是1、2、3。其中一个是偶数,两个是奇数。\n\n 示例2:\n\n 输入:12\n 输出:(4,6)\n 说明:\n 整数回文是1、2、3、4、5、6、7、8、9、11。其中4个是偶数,6个是奇数。\n\n 注意:\n 1. 1 <= n <= 10^3\n 2. 返回的元组分别是偶数和奇数整数回文数的数量。\n \n \"\"\"\n", "entry_point": "even_odd_palindrome", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(123) == (8, 13)\n assert candidate(12) == (4, 6)\n assert candidate(3) == (1, 2)\n assert candidate(63) == (6, 8)\n assert candidate(25) == (5, 6)\n assert candidate(19) == (4, 6)\n assert candidate(9) == (4, 5), \"This prints if this assert fails 1 (good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(1) == (0, 1), \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " def is_palindrome(n):\n return str(n) == str(n)[::-1]\n\n even_palindrome_count = 0\n odd_palindrome_count = 0\n\n for i in range(1, n+1):\n if i%2 == 1 and is_palindrome(i):\n odd_palindrome_count += 1\n elif i%2 == 0 and is_palindrome(i):\n even_palindrome_count += 1\n return (even_palindrome_count, odd_palindrome_count)\n", "description": "给定一个正整数n,返回一个元组,其中包含在范围(1,n)内的偶数和奇数整数回文数的数量。\n\n 示例1:\n\n 输入:3\n 输出:(1,2)\n 说明:\n 整数回文是1、2、3。其中一个是偶数,两个是奇数。\n\n 示例2:\n\n 输入:12\n 输出:(4,6)\n 说明:\n 整数回文是1、2、3、4、5、6、7、8、9、11。其中4个是偶数,6个是奇数。\n\n 注意:\n 1. 1 <= n <= 10^3\n 2. 返回的元组分别是偶数和奇数整数回文数的数量。\n ", "natural_language": "Chinese"} 50 | {"task_id": "python/49", "prompt": "\ndef move_one_ball(arr):\n \"\"\"我们有一个由N个整数组成的数组'arr',数组中的数字将是随机排序的。你的任务是确定是否可以通过对给定数组执行以下操作来获得按非递减顺序排序的数组:\n\n 您可以执行任意次数的右移操作。\n \n 一个右移操作意味着将数组的所有元素向右移动一个位置。数组的最后一个元素将移动到数组的起始位置,即0号索引。\n\n 如果可以通过执行上述操作获得排序后的数组,则返回True,否则返回False。\n 如果给定的数组为空,则返回True。\n\n 注意:给定的列表保证具有唯一元素。\n\n 例如:\n \n move_one_ball([3, 4, 5, 1, 2])==>True\n 解释:通过执行2次右移操作,可以为给定数组实现非递减顺序。\n move_one_ball([3, 5, 4, 1, 2])==>False\n 解释:通过执行任意数量的右移操作,无法为给定数组获得非递减顺序。\n \n \n \"\"\"\n", "entry_point": "move_one_ball", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([3, 4, 5, 1, 2])==True, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([3, 5, 10, 1, 2])==True\n assert candidate([4, 3, 1, 2])==False\n # Check some edge cases that are easy to work out by hand.\n assert candidate([3, 5, 4, 1, 2])==False, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate([])==True\n", "language": "python", "canonical_solution": " if len(arr)==0:\n return True\n sorted_array=sorted(arr)\n my_arr=[]\n \n min_value=min(arr)\n min_index=arr.index(min_value)\n my_arr=arr[min_index:]+arr[0:min_index]\n for i in range(len(arr)):\n if my_arr[i]!=sorted_array[i]:\n return False\n return True\n", "description": "我们有一个由N个整数组成的数组'arr',数组中的数字将是随机排序的。你的任务是确定是否可以通过对给定数组执行以下操作来获得按非递减顺序排序的数组:\n\n 您可以执行任意次数的右移操作。\n \n 一个右移操作意味着将数组的所有元素向右移动一个位置。数组的最后一个元素将移动到数组的起始位置,即0号索引。\n\n 如果可以通过执行上述操作获得排序后的数组,则返回True,否则返回False。\n 如果给定的数组为空,则返回True。\n\n 注意:给定的列表保证具有唯一元素。\n\n 例如:\n \n move_one_ball([3, 4, 5, 1, 2])==>True\n 解释:通过执行2次右移操作,可以为给定数组实现非递减顺序。\n move_one_ball([3, 5, 4, 1, 2])==>False\n 解释:通过执行任意数量的右移操作,无法为给定数组获得非递减顺序。\n ", "natural_language": "Chinese"} 51 | {"task_id": "python/50", "prompt": "\ndef exchange(lst1, lst2):\n \"\"\"在这个问题中,您将实现一个函数,该函数接受两个数字列表,并确定是否可以执行元素交换,使lst1成为仅包含偶数的列表。在lst1和lst2之间交换元素的数量没有限制。如果可以在lst1和lst2之间交换元素以使lst1的所有元素都是偶数,则返回“YES”。否则,返回“NO”。例如:exchange([1,2,3,4],[1,2,3,4])=>“YES”exchange([1,2,3,4],[1,5,3,4])=>“NO”假定输入列表将不为空。\n \n \"\"\"\n", "entry_point": "exchange", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([1, 2, 3, 4], [1, 2, 3, 4]) == \"YES\"\n assert candidate([1, 2, 3, 4], [1, 5, 3, 4]) == \"NO\"\n assert candidate([1, 2, 3, 4], [2, 1, 4, 3]) == \"YES\" \n assert candidate([5, 7, 3], [2, 6, 4]) == \"YES\"\n assert candidate([5, 7, 3], [2, 6, 3]) == \"NO\" \n assert candidate([3, 2, 6, 1, 8, 9], [3, 5, 5, 1, 1, 1]) == \"NO\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([100, 200], [200, 200]) == \"YES\"\n\n", "language": "python", "canonical_solution": " odd = 0\n even = 0\n for i in lst1:\n if i%2 == 1:\n odd += 1\n for i in lst2:\n if i%2 == 0:\n even += 1\n if even >= odd:\n return \"YES\"\n return \"NO\"\n \n", "description": "在这个问题中,您将实现一个函数,该函数接受两个数字列表,并确定是否可以执行元素交换,使lst1成为仅包含偶数的列表。在lst1和lst2之间交换元素的数量没有限制。如果可以在lst1和lst2之间交换元素以使lst1的所有元素都是偶数,则返回“YES”。否则,返回“NO”。例如:exchange([1,2,3,4],[1,2,3,4])=>“YES”exchange([1,2,3,4],[1,5,3,4])=>“NO”假定输入列表将不为空。\n ", "natural_language": "Chinese"} 52 | {"task_id": "python/51", "prompt": "\ndef reverse_delete(s,c):\n \"\"\"任务\n给定两个字符串s和c,您必须删除s中所有与c中任何字符相等的字符,然后检查结果字符串是否为回文。\n如果一个字符串从前往后读和从后往前读是一样的,那么这个字符串就是回文的。\n您应该返回一个包含结果字符串和True/False检查的元组。\n例子\n对于s =“abcde”,c =“ae”,结果应该是('bcd',False)\n对于s =“abcdef”,c =“b”,结果应该是('acdef',False)\n对于s =“abcdedcba”,c =“ab”,结果应该是('cdedc',True)\n \n \"\"\"\n", "entry_point": "reverse_delete", "test": "def check(candidate):\n\n assert candidate(\"abcde\",\"ae\") == ('bcd',False)\n assert candidate(\"abcdef\", \"b\") == ('acdef',False)\n assert candidate(\"abcdedcba\",\"ab\") == ('cdedc',True)\n assert candidate(\"dwik\",\"w\") == ('dik',False)\n assert candidate(\"a\",\"a\") == ('',True)\n assert candidate(\"abcdedcba\",\"\") == ('abcdedcba',True)\n assert candidate(\"abcdedcba\",\"v\") == ('abcdedcba',True)\n assert candidate(\"vabba\",\"v\") == ('abba',True)\n assert candidate(\"mamma\", \"mia\") == (\"\", True)\n", "language": "python", "canonical_solution": " s = ''.join([char for char in s if char not in c])\n return (s,s[::-1] == s)\n", "description": "任务\n给定两个字符串s和c,您必须删除s中所有与c中任何字符相等的字符,然后检查结果字符串是否为回文。\n如果一个字符串从前往后读和从后往前读是一样的,那么这个字符串就是回文的。\n您应该返回一个包含结果字符串和True/False检查的元组。\n例子\n对于s =“abcde”,c =“ae”,结果应该是('bcd',False)\n对于s =“abcdef”,c =“b”,结果应该是('acdef',False)\n对于s =“abcdedcba”,c =“ab”,结果应该是('cdedc',True)\n ", "natural_language": "Chinese"} 53 | {"task_id": "python/52", "prompt": "\ndef max_fill(grid, capacity):\n import math\n \"\"\"\n 你有一个井的矩形网格。每一行代表一个井,每一行中的1代表一个单位的水。每个井都有一个对应的桶,可以用来从中提取水,所有桶的容量相同。你的任务是使用桶来排空井。输出你需要降低桶的次数。\n\n 例1:\n输入:\n grid:[[0,0,1,0],[0,1,0,0],[1,1,1,1]]\n bucket_capacity:1\n输出:6\n\n 例2:\n输入:\n grid:[[0,0,1,1],[0,0,0,0],[1,1,1,1],[0,1,1,1]]\n bucket_capacity:2\n输出:5\n\n 例3:\n输入:\n grid:[[0,0,0],[0,0,0]]\n bucket_capacity:5\n输出:0\n\n 约束:\n*所有井的长度相同\n*1 <= grid.length <= 10^2\n*1 <= grid [:,1].length <= 10^2\n*grid [i] [j] -> 0 | 1\n*1 <= capacity <= 10\n \n \"\"\"\n", "entry_point": "max_fill", "test": "def check(candidate):\n\n\n # Check some simple cases\n assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([[0,0,1,0], [0,1,0,0], [1,1,1,1]], 1) == 6, \"Error\"\n assert candidate([[0,0,1,1], [0,0,0,0], [1,1,1,1], [0,1,1,1]], 2) == 5, \"Error\"\n assert candidate([[0,0,0], [0,0,0]], 5) == 0, \"Error\"\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate([[1,1,1,1], [1,1,1,1]], 2) == 4, \"Error\"\n assert candidate([[1,1,1,1], [1,1,1,1]], 9) == 2, \"Error\"\n\n", "language": "python", "canonical_solution": " return sum([math.ceil(sum(arr)/capacity) for arr in grid])\n", "description": "你有一个井的矩形网格。每一行代表一个井,每一行中的1代表一个单位的水。每个井都有一个对应的桶,可以用来从中提取水,所有桶的容量相同。你的任务是使用桶来排空井。输出你需要降低桶的次数。\n\n 例1:\n输入:\n grid:[[0,0,1,0],[0,1,0,0],[1,1,1,1]]\n bucket_capacity:1\n输出:6\n\n 例2:\n输入:\n grid:[[0,0,1,1],[0,0,0,0],[1,1,1,1],[0,1,1,1]]\n bucket_capacity:2\n输出:5\n\n 例3:\n输入:\n grid:[[0,0,0],[0,0,0]]\n bucket_capacity:5\n输出:0\n\n 约束:\n*所有井的长度相同\n*1 <= grid.length <= 10^2\n*1 <= grid [:,1].length <= 10^2\n*grid [i] [j] -> 0 | 1\n*1 <= capacity <= 10\n ", "natural_language": "Chinese"} 54 | {"task_id": "python/53", "prompt": "\ndef select_words(s, n):\n \"\"\"给定一个字符串s和一个自然数n,你被要求实现一个函数,该函数返回字符串s中包含恰好n个辅音字母的所有单词的列表,按照它们在字符串s中出现的顺序。\n如果字符串s为空,则函数应返回一个空列表。\n注意:您可以假设输入字符串仅包含字母和空格。\n示例:\n \n select_words(\"Mary had a little lamb\", 4) ==> [\"little\"]\n select_words(\"Mary had a little lamb\", 3) ==> [\"Mary\", \"lamb\"]\n select_words(\"simple white space\", 2) ==> []\n select_words(\"Hello world\", 4) ==> [\"world\"]\n select_words(\"Uncle sam\", 3) ==> [\"Uncle\"]\n \"\"\"\n", "entry_point": "select_words", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"Mary had a little lamb\", 4) == [\"little\"], \"First test error: \" + str(candidate(\"Mary had a little lamb\", 4)) \n assert candidate(\"Mary had a little lamb\", 3) == [\"Mary\", \"lamb\"], \"Second test error: \" + str(candidate(\"Mary had a little lamb\", 3)) \n assert candidate(\"simple white space\", 2) == [], \"Third test error: \" + str(candidate(\"simple white space\", 2)) \n assert candidate(\"Hello world\", 4) == [\"world\"], \"Fourth test error: \" + str(candidate(\"Hello world\", 4)) \n assert candidate(\"Uncle sam\", 3) == [\"Uncle\"], \"Fifth test error: \" + str(candidate(\"Uncle sam\", 3))\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(\"\", 4) == [], \"1st edge test error: \" + str(candidate(\"\", 4))\n assert candidate(\"a b c d e f\", 1) == [\"b\", \"c\", \"d\", \"f\"], \"2nd edge test error: \" + str(candidate(\"a b c d e f\", 1))\n\n", "language": "python", "canonical_solution": " result = []\n for word in s.split():\n n_consonants = 0\n for i in range(0, len(word)):\n if word[i].lower() not in [\"a\",\"e\",\"i\",\"o\",\"u\"]:\n n_consonants += 1 \n if n_consonants == n:\n result.append(word)\n return result\n\n", "description": "给定一个字符串s和一个自然数n,你被要求实现一个函数,该函数返回字符串s中包含恰好n个辅音字母的所有单词的列表,按照它们在字符串s中出现的顺序。\n如果字符串s为空,则函数应返回一个空列表。\n注意:您可以假设输入字符串仅包含字母和空格。\n示例:\n ", "natural_language": "Chinese"} 55 | {"task_id": "python/54", "prompt": "\ndef maximum(arr, k):\n \"\"\"\n 给定一个整数数组arr和一个正整数k,返回一个长度为k的排序列表,其中包含arr中最大的k个数字。\n\n 示例1:\n\n 输入:arr = [-3,-4,5],k = 3\n 输出:[-4,-3,5]\n\n 示例2:\n\n 输入:arr = [4,-4,4],k = 2\n 输出:[4,4]\n\n 示例3:\n\n 输入:arr = [-3,2,1,2,-1,-2,1],k = 1\n 输出:[2]\n\n 注意:\n\n 1.数组的长度将在[1,1000]范围内。\n 2.数组中的元素将在[-1000,1000]范围内。\n 3.0 <= k <= len(arr)\n \n \"\"\"\n", "entry_point": "maximum", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([-3, -4, 5], 3) == [-4, -3, 5]\n assert candidate([4, -4, 4], 2) == [4, 4]\n assert candidate([-3, 2, 1, 2, -1, -2, 1], 1) == [2]\n assert candidate([123, -123, 20, 0 , 1, 2, -3], 3) == [2, 20, 123]\n assert candidate([-123, 20, 0 , 1, 2, -3], 4) == [0, 1, 2, 20]\n assert candidate([5, 15, 0, 3, -13, -8, 0], 7) == [-13, -8, 0, 0, 3, 5, 15]\n assert candidate([-1, 0, 2, 5, 3, -10], 2) == [3, 5]\n assert candidate([1, 0, 5, -7], 1) == [5]\n assert candidate([4, -4], 2) == [-4, 4]\n assert candidate([-10, 10], 2) == [-10, 10]\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([1, 2, 3, -23, 243, -400, 0], 0) == []\n\n", "language": "python", "canonical_solution": " if k == 0:\n return []\n arr.sort()\n ans = arr[-k:]\n return ans\n", "description": "给定一个整数数组arr和一个正整数k,返回一个长度为k的排序列表,其中包含arr中最大的k个数字。\n\n 示例1:\n\n 输入:arr = [-3,-4,5],k = 3\n 输出:[-4,-3,5]\n\n 示例2:\n\n 输入:arr = [4,-4,4],k = 2\n 输出:[4,4]\n\n 示例3:\n\n 输入:arr = [-3,2,1,2,-1,-2,1],k = 1\n 输出:[2]\n\n 注意:\n\n 1.数组的长度将在[1,1000]范围内。\n 2.数组中的元素将在[-1000,1000]范围内。\n 3.0 <= k <= len(arr)\n ", "natural_language": "Chinese"} 56 | {"task_id": "python/55", "prompt": "\ndef add_elements(arr, k):\n \"\"\"\n 给定一个非空整数数组 arr 和一个整数 k,返回 arr 的前 k 个元素中最多有两位数的元素的和。\n\n 示例:\n\n 输入:arr = [111,21,3,4000,5,6,7,8,9],k = 4\n 输出:24 # 21 + 3 的和\n\n 限制条件:\n 1. 1 <= len(arr) <= 100\n 2. 1 <= k <= len(arr)\n \n \"\"\"\n", "entry_point": "add_elements", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([1,-2,-3,41,57,76,87,88,99], 3) == -4\n assert candidate([111,121,3,4000,5,6], 2) == 0\n assert candidate([11,21,3,90,5,6,7,8,9], 4) == 125\n assert candidate([111,21,3,4000,5,6,7,8,9], 4) == 24, \"This prints if this assert fails 1 (good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([1], 1) == 1, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " return sum(elem for elem in arr[:k] if len(str(elem)) <= 2)\n", "description": "给定一个非空整数数组 arr 和一个整数 k,返回 arr 的前 k 个元素中最多有两位数的元素的和。\n\n 示例:\n\n 输入:arr = [111,21,3,4000,5,6,7,8,9],k = 4\n 输出:24 # 21 + 3 的和\n\n 限制条件:\n 1. 1 <= len(arr) <= 100\n 2. 1 <= k <= len(arr)\n ", "natural_language": "Chinese"} 57 | {"task_id": "python/56", "prompt": "\ndef intersection(interval1, interval2):\n \"\"\"给定两个区间,\n 每个区间都是一对整数。例如,区间 = (起始,结束) = (1,2)。\n 给定的区间是闭合的,这意味着区间(起始,结束)\n 包括起始和结束。\n 对于每个给定的区间,假定其起始小于或等于其结束。\n 您的任务是确定这两个区间的交集长度是否为质数。\n 例如,区间(1,3),(2,4)的交集是(2,3)\n 其长度为1,不是质数。\n 如果交集的长度是质数,则返回“YES”,\n 否则返回“NO”。\n 如果两个区间不相交,则返回“NO”。\n\n [输入/输出]示例:\n \n intersection((1, 2), (2, 3)) ==> \"NO\"\n intersection((-1, 1), (0, 4)) ==> \"NO\"\n intersection((-3, -1), (-5, 5)) ==> \"YES\"\n \"\"\"\n", "entry_point": "intersection", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate((1, 2), (2, 3)) == \"NO\"\n assert candidate((-1, 1), (0, 4)) == \"NO\"\n assert candidate((-3, -1), (-5, 5)) == \"YES\"\n assert candidate((-2, 2), (-4, 0)) == \"YES\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate((-11, 2), (-1, -1)) == \"NO\"\n assert candidate((1, 2), (3, 5)) == \"NO\"\n assert candidate((1, 2), (1, 2)) == \"NO\"\n assert candidate((-2, -2), (-3, -2)) == \"NO\"\n\n", "language": "python", "canonical_solution": " def is_prime(num):\n if num == 1 or num == 0:\n return False\n if num == 2:\n return True\n for i in range(2, num):\n if num%i == 0:\n return False\n return True\n\n l = max(interval1[0], interval2[0])\n r = min(interval1[1], interval2[1])\n length = r - l\n if length > 0 and is_prime(length):\n return \"YES\"\n return \"NO\"\n", "description": "给定两个区间,\n 每个区间都是一对整数。例如,区间 = (起始,结束) = (1,2)。\n 给定的区间是闭合的,这意味着区间(起始,结束)\n 包括起始和结束。\n 对于每个给定的区间,假定其起始小于或等于其结束。\n 您的任务是确定这两个区间的交集长度是否为质数。\n 例如,区间(1,3),(2,4)的交集是(2,3)\n 其长度为1,不是质数。\n 如果交集的长度是质数,则返回“YES”,\n 否则返回“NO”。\n 如果两个区间不相交,则返回“NO”。\n\n [输入/输出]示例:\n ", "natural_language": "Chinese"} 58 | {"task_id": "python/57", "prompt": "\ndef tri(n):\n \"\"\"每个人都知道斐波那契数列,在过去的几个世纪里,数学家们对其进行了深入研究。然而,人们不知道的是特里波那契数列。特里波那契数列由递归定义:\ntri(1) = 3\ntri(n) = 1 + n / 2,如果n是偶数。\ntri(n) = tri(n - 1) + tri(n - 2) + tri(n + 1),如果n是奇数。\n例如:\ntri(2) = 1 + (2 / 2) = 2\ntri(4) = 3\ntri(3) = tri(2) + tri(1) + tri(4)\n= 2 + 3 + 3 = 8\n给定一个非负整数n,您必须返回特里波那契数列的前n + 1个数字的列表。\n例子:\ntri(3) = [1, 3, 2, 8]\n \n \"\"\"\n", "entry_point": "tri", "test": "def check(candidate):\n\n # Check some simple cases\n \n assert candidate(3) == [1, 3, 2.0, 8.0]\n assert candidate(4) == [1, 3, 2.0, 8.0, 3.0]\n assert candidate(5) == [1, 3, 2.0, 8.0, 3.0, 15.0]\n assert candidate(6) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0]\n assert candidate(7) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0]\n assert candidate(8) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0, 5.0]\n assert candidate(9) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0, 5.0, 35.0]\n assert candidate(20) == [1, 3, 2.0, 8.0, 3.0, 15.0, 4.0, 24.0, 5.0, 35.0, 6.0, 48.0, 7.0, 63.0, 8.0, 80.0, 9.0, 99.0, 10.0, 120.0, 11.0]\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(0) == [1]\n assert candidate(1) == [1, 3]\n", "language": "python", "canonical_solution": " if n == 0:\n return [1]\n my_tri = [1, 3]\n for i in range(2, n + 1):\n if i % 2 == 0:\n my_tri.append(i / 2 + 1)\n else:\n my_tri.append(my_tri[i - 1] + my_tri[i - 2] + (i + 3) / 2)\n return my_tri\n", "description": "每个人都知道斐波那契数列,在过去的几个世纪里,数学家们对其进行了深入研究。然而,人们不知道的是特里波那契数列。特里波那契数列由递归定义:\ntri(1) = 3\ntri(n) = 1 + n / 2,如果n是偶数。\ntri(n) = tri(n - 1) + tri(n - 2) + tri(n + 1),如果n是奇数。\n例如:\ntri(2) = 1 + (2 / 2) = 2\ntri(4) = 3\ntri(3) = tri(2) + tri(1) + tri(4)\n= 2 + 3 + 3 = 8\n给定一个非负整数n,您必须返回特里波那契数列的前n + 1个数字的列表。\n例子:\ntri(3) = [1, 3, 2, 8]\n ", "natural_language": "Chinese"} 59 | {"task_id": "python/58", "prompt": "\ndef digits(n):\n \"\"\"给定一个正整数n,返回所有奇数位数字的乘积。\n如果所有数字都是偶数,则返回0。\n例如:\n \n digits(1) == 1\n digits(4) == 0\n digits(235) == 15\n \"\"\"\n", "entry_point": "digits", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(5) == 5\n assert candidate(54) == 5\n assert candidate(120) ==1\n assert candidate(5014) == 5\n assert candidate(98765) == 315\n assert candidate(5576543) == 2625\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(2468) == 0\n\n", "language": "python", "canonical_solution": " product = 1\n odd_count = 0\n for digit in str(n):\n int_digit = int(digit)\n if int_digit%2 == 1:\n product= product*int_digit\n odd_count+=1\n if odd_count ==0:\n return 0\n else:\n return product\n", "description": "给定一个正整数n,返回所有奇数位数字的乘积。\n如果所有数字都是偶数,则返回0。\n例如:\n ", "natural_language": "Chinese"} 60 | {"task_id": "python/59", "prompt": "\ndef is_nested(string):\n '''\n 创建一个函数,该函数以仅包含方括号的字符串作为输入。\n 如果存在有效的括号子序列且至少有一个括号嵌套,则函数应返回True。\n \n\n is_nested('[[]]') ➞ True\n is_nested('[]]]]]]][[[[[]') ➞ False\n is_nested('[][]') ➞ False\n is_nested('[]') ➞ False\n is_nested('[[][]]') ➞ True\n is_nested('[[]][[') ➞ True\n '''\n", "entry_point": "is_nested", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate('[[]]') == True, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate('[]]]]]]][[[[[]') == False\n assert candidate('[][]') == False\n assert candidate(('[]')) == False\n assert candidate('[[[[]]]]') == True\n assert candidate('[]]]]]]]]]]') == False\n assert candidate('[][][[]]') == True\n assert candidate('[[]') == False\n assert candidate('[]]') == False\n assert candidate('[[]][[') == True\n assert candidate('[[][]]') == True\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate('') == False, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate('[[[[[[[[') == False\n assert candidate(']]]]]]]]') == False\n\n", "language": "python", "canonical_solution": " opening_bracket_index = []\n closing_bracket_index = []\n for i in range(len(string)):\n if string[i] == '[':\n opening_bracket_index.append(i)\n else:\n closing_bracket_index.append(i)\n closing_bracket_index.reverse()\n cnt = 0\n i = 0\n l = len(closing_bracket_index)\n for idx in opening_bracket_index:\n if i < l and idx < closing_bracket_index[i]:\n cnt += 1\n i += 1\n return cnt >= 2\n\n \n", "description": "创建一个函数,该函数以仅包含方括号的字符串作为输入。\n 如果存在有效的括号子序列且至少有一个括号嵌套,则函数应返回True。\n ", "natural_language": "Chinese"} 61 | {"task_id": "python/60", "prompt": "\n\ndef sum_squares(lst):\n \"\"\"给定一个数字列表。\n你需要返回给定列表中平方数的总和,\n首先将列表中的每个元素四舍五入到上限整数(Ceiling)。\n例子:\n对于lst = [1,2,3],输出应该是14\n对于lst = [1,4,9],输出应该是98\n对于lst = [1,3,5,7],输出应该是84\n对于lst = [1.4,4.2,0],输出应该是29\n对于lst = [-2.4,1,1],输出应该是6\n \n \n\n \"\"\"\n", "entry_point": "sum_squares", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([1,2,3])==14, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([1.0,2,3])==14, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([1,3,5,7])==84, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([1.4,4.2,0])==29, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([-2.4,1,1])==6, \"This prints if this assert fails 1 (good for debugging!)\"\n\n assert candidate([100,1,15,2])==10230, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([10000,10000])==200000000, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([-1.4,4.6,6.3])==75, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([-1.4,17.9,18.9,19.9])==1086, \"This prints if this assert fails 1 (good for debugging!)\"\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([0])==0, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate([-1])==1, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate([-1,1,0])==2, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " import math\n squared = 0\n for i in lst:\n squared += math.ceil(i)**2\n return squared\n", "description": "给定一个数字列表。\n你需要返回给定列表中平方数的总和,\n首先将列表中的每个元素四舍五入到上限整数(Ceiling)。\n例子:\n对于lst = [1,2,3],输出应该是14\n对于lst = [1,4,9],输出应该是98\n对于lst = [1,3,5,7],输出应该是84\n对于lst = [1.4,4.2,0],输出应该是29\n对于lst = [-2.4,1,1],输出应该是6\n ", "natural_language": "Chinese"} 62 | {"task_id": "python/61", "prompt": "\ndef check_if_last_char_is_a_letter(txt):\n '''\n 创建一个函数,如果给定字符串的最后一个字符是字母字符且不是单词的一部分,则返回True,否则返回False。\n注意:“单词”是由空格分隔的一组字符。\n\n 例子:\n \n check_if_last_char_is_a_letter(\"apple pie\") ➞ False\n check_if_last_char_is_a_letter(\"apple pi e\") ➞ True\n check_if_last_char_is_a_letter(\"apple pi e \") ➞ False\n check_if_last_char_is_a_letter(\"\") ➞ False \n '''\n", "entry_point": "check_if_last_char_is_a_letter", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"apple\") == False\n assert candidate(\"apple pi e\") == True\n assert candidate(\"eeeee\") == False\n assert candidate(\"A\") == True\n assert candidate(\"Pumpkin pie \") == False\n assert candidate(\"Pumpkin pie 1\") == False\n assert candidate(\"\") == False\n assert candidate(\"eeeee e \") == False\n assert candidate(\"apple pie\") == False\n assert candidate(\"apple pi e \") == False\n\n # Check some edge cases that are easy to work out by hand.\n assert True\n\n", "language": "python", "canonical_solution": " \n check = txt.split(' ')[-1]\n return True if len(check) == 1 and (97 <= ord(check.lower()) <= 122) else False\n", "description": "创建一个函数,如果给定字符串的最后一个字符是字母字符且不是单词的一部分,则返回True,否则返回False。\n注意:“单词”是由空格分隔的一组字符。\n\n 例子:\n ", "natural_language": "Chinese"} 63 | {"task_id": "python/62", "prompt": "\ndef can_arrange(arr):\n \"\"\"创建一个函数,该函数返回一个元素的最大索引,该元素不大于或等于其前面的元素。如果不存在这样的元素,则返回-1。给定的数组不包含重复值。\n\n 例子:\n \n can_arrange([1,2,4,3,5]) = 3\n can_arrange([1,2,3]) = -1\n \"\"\"\n", "entry_point": "can_arrange", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([1,2,4,3,5])==3\n assert candidate([1,2,4,5])==-1\n assert candidate([1,4,2,5,6,7,8,9,10])==2\n assert candidate([4,8,5,7,3])==4\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([])==-1\n\n", "language": "python", "canonical_solution": " ind=-1\n i=1\n while i 0, lst))\n return (max(smallest) if smallest else None, min(largest) if largest else None)\n", "description": "创建一个函数,返回一个元组(a,b),其中'a'是负整数中最大的,'b'是正整数中最小的。如果列表中没有负数或正数,则将它们作为None返回。\n\n 例子:\n ", "natural_language": "Chinese"} 65 | {"task_id": "python/64", "prompt": "\ndef special_factorial(n):\n \"\"\"巴西阶乘被定义为:\n 巴西阶乘(n) = n! * (n-1)! * (n-2)! * ... * 1!\n 其中 n > 0\n\n 例如:\n \n >>> special_factorial(4)\n 288\n\n The function will receive an integer as input and should return the special\n factorial of this integer.\n \"\"\"\n", "entry_point": "special_factorial", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(4) == 288, \"Test 4\"\n assert candidate(5) == 34560, \"Test 5\"\n assert candidate(7) == 125411328000, \"Test 7\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(1) == 1, \"Test 1\"\n\n", "language": "python", "canonical_solution": " fact_i = 1\n special_fact = 1\n for i in range(1, n+1):\n fact_i *= i\n special_fact *= fact_i\n return special_fact\n", "description": "巴西阶乘被定义为:\n 巴西阶乘(n) = n! * (n-1)! * (n-2)! * ... * 1!\n 其中 n > 0\n\n 例如:\n ", "natural_language": "Chinese"} 66 | {"task_id": "python/65", "prompt": "\ndef words_in_sentence(sentence):\n \"\"\"\n 给定一个表示句子的字符串,\n句子包含一些由空格分隔的单词,\n您必须返回一个字符串,其中包含原始句子中长度为质数的单词,\n新字符串中单词的顺序应与原始字符串相同。\n\n 示例1:\n 输入:sentence = \"This is a test\"\n 输出:\"is\"\n\n 示例2:\n 输入:sentence = \"lets go for swimming\"\n 输出:\"go for\"\n\n 限制:\n * 1 <= len(sentence) <= 100\n * 句子只包含字母\n \n \"\"\"\n", "entry_point": "words_in_sentence", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"This is a test\") == \"is\"\n assert candidate(\"lets go for swimming\") == \"go for\"\n assert candidate(\"there is no place available here\") == \"there is no place\"\n assert candidate(\"Hi I am Hussein\") == \"Hi am Hussein\"\n assert candidate(\"go for it\") == \"go for it\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(\"here\") == \"\"\n assert candidate(\"here is\") == \"is\"\n\n", "language": "python", "canonical_solution": " new_lst = []\n for word in sentence.split():\n flg = 0\n if len(word) == 1:\n flg = 1\n for i in range(2, len(word)):\n if len(word)%i == 0:\n flg = 1\n if flg == 0 or len(word) == 2:\n new_lst.append(word)\n return \" \".join(new_lst)\n", "description": "给定一个表示句子的字符串,\n句子包含一些由空格分隔的单词,\n您必须返回一个字符串,其中包含原始句子中长度为质数的单词,\n新字符串中单词的顺序应与原始字符串相同。\n\n 示例1:\n 输入:sentence = \"This is a test\"\n 输出:\"is\"\n\n 示例2:\n 输入:sentence = \"lets go for swimming\"\n 输出:\"go for\"\n\n 限制:\n * 1 <= len(sentence) <= 100\n * 句子只包含字母\n ", "natural_language": "Chinese"} 67 | {"task_id": "python/66", "prompt": "\ndef simplify(x, n):\n \"\"\"你的任务是实现一个函数,简化表达式 x * n。如果 x * n 可以计算为整数,则函数返回 True,否则返回 False。x 和 n 都是分数的字符串表示形式,格式为 <分子>/<分母>,其中分子和分母都是正整数。\n\n 你可以假设 x 和 n 都是有效的分数,并且分母不为零。\n \n\n simplify(\"1/5\", \"5/1\") = True\n simplify(\"1/6\", \"2/1\") = False\n simplify(\"7/10\", \"10/2\") = False\n \"\"\"\n", "entry_point": "simplify", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"1/5\", \"5/1\") == True, 'test1'\n assert candidate(\"1/6\", \"2/1\") == False, 'test2'\n assert candidate(\"5/1\", \"3/1\") == True, 'test3'\n assert candidate(\"7/10\", \"10/2\") == False, 'test4'\n assert candidate(\"2/10\", \"50/10\") == True, 'test5'\n assert candidate(\"7/2\", \"4/2\") == True, 'test6'\n assert candidate(\"11/6\", \"6/1\") == True, 'test7'\n assert candidate(\"2/3\", \"5/2\") == False, 'test8'\n assert candidate(\"5/2\", \"3/5\") == False, 'test9'\n assert candidate(\"2/4\", \"8/4\") == True, 'test10'\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(\"2/4\", \"4/2\") == True, 'test11'\n assert candidate(\"1/5\", \"5/1\") == True, 'test12'\n assert candidate(\"1/5\", \"1/5\") == False, 'test13'\n\n", "language": "python", "canonical_solution": " a, b = x.split(\"/\")\n c, d = n.split(\"/\")\n numerator = int(a) * int(c)\n denom = int(b) * int(d)\n if (numerator/denom == int(numerator/denom)):\n return True\n return False\n", "description": "你的任务是实现一个函数,简化表达式 x * n。如果 x * n 可以计算为整数,则函数返回 True,否则返回 False。x 和 n 都是分数的字符串表示形式,格式为 <分子>/<分母>,其中分子和分母都是正整数。\n\n 你可以假设 x 和 n 都是有效的分数,并且分母不为零。\n ", "natural_language": "Chinese"} 68 | {"task_id": "python/67", "prompt": "\ndef order_by_points(nums):\n \"\"\"\n 编写一个函数,根据数字的各位数字之和,将给定的整数列表按升序排序。\n注意:如果有多个数字的各位数字之和相似,则按照它们在原始列表中的索引排序。\n\n 例如:\n \n >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n >>> order_by_points([]) == []\n \"\"\"\n", "entry_point": "order_by_points", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n assert candidate([1234,423,463,145,2,423,423,53,6,37,3457,3,56,0,46]) == [0, 2, 3, 6, 53, 423, 423, 423, 1234, 145, 37, 46, 56, 463, 3457]\n assert candidate([]) == []\n assert candidate([1, -11, -32, 43, 54, -98, 2, -3]) == [-3, -32, -98, -11, 1, 2, 43, 54]\n assert candidate([1,2,3,4,5,6,7,8,9,10,11]) == [1, 10, 2, 11, 3, 4, 5, 6, 7, 8, 9]\n assert candidate([0,6,6,-76,-21,23,4]) == [-76, -21, 0, 4, 23, 6, 6]\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " def digits_sum(n):\n neg = 1\n if n < 0: n, neg = -1 * n, -1 \n n = [int(i) for i in str(n)]\n n[0] = n[0] * neg\n return sum(n)\n return sorted(nums, key=digits_sum)\n", "description": "编写一个函数,根据数字的各位数字之和,将给定的整数列表按升序排序。\n注意:如果有多个数字的各位数字之和相似,则按照它们在原始列表中的索引排序。\n\n 例如:\n ", "natural_language": "Chinese"} 69 | {"task_id": "python/68", "prompt": "\ndef specialFilter(nums):\n \"\"\"编写一个函数,它以数字数组作为输入,并返回数组中大于10且数字的第一个和最后一个数字都是奇数(1、3、5、7、9)的元素数量。例如:\n \n specialFilter([15, -73, 14, -15]) => 1 \n specialFilter([33, -2, -3, 45, 21, 109]) => 2\n \"\"\"\n", "entry_point": "specialFilter", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([5, -2, 1, -5]) == 0 \n assert candidate([15, -73, 14, -15]) == 1\n assert candidate([33, -2, -3, 45, 21, 109]) == 2\n assert candidate([43, -12, 93, 125, 121, 109]) == 4\n assert candidate([71, -2, -33, 75, 21, 19]) == 3\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([1]) == 0 \n assert candidate([]) == 0 \n\n", "language": "python", "canonical_solution": " \n count = 0\n for num in nums:\n if num > 10:\n odd_digits = (1, 3, 5, 7, 9)\n number_as_string = str(num)\n if int(number_as_string[0]) in odd_digits and int(number_as_string[-1]) in odd_digits:\n count += 1\n \n return count \n", "description": "编写一个函数,它以数字数组作为输入,并返回数组中大于10且数字的第一个和最后一个数字都是奇数(1、3、5、7、9)的元素数量。例如:\n ", "natural_language": "Chinese"} 70 | {"task_id": "python/69", "prompt": "\ndef get_max_triples(n):\n \"\"\"\n 给定一个正整数n。你需要创建一个长度为n的整数数组a。\n对于每个i(1≤i≤n),a[i]的值=i * i-i + 1。\n返回a中i (\"Saturn\", \"Uranus\")\n bf(\"Earth\", \"Mercury\") ==> (\"Venus\")\n bf(\"Mercury\", \"Uranus\") ==> (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\")\n '''\n", "entry_point": "bf", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"Jupiter\", \"Neptune\") == (\"Saturn\", \"Uranus\"), \"First test error: \" + str(len(candidate(\"Jupiter\", \"Neptune\"))) \n assert candidate(\"Earth\", \"Mercury\") == (\"Venus\",), \"Second test error: \" + str(candidate(\"Earth\", \"Mercury\")) \n assert candidate(\"Mercury\", \"Uranus\") == (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\"), \"Third test error: \" + str(candidate(\"Mercury\", \"Uranus\")) \n assert candidate(\"Neptune\", \"Venus\") == (\"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\"), \"Fourth test error: \" + str(candidate(\"Neptune\", \"Venus\")) \n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(\"Earth\", \"Earth\") == ()\n assert candidate(\"Mars\", \"Earth\") == ()\n assert candidate(\"Jupiter\", \"Makemake\") == ()\n\n", "language": "python", "canonical_solution": " planet_names = (\"Mercury\", \"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\", \"Neptune\")\n if planet1 not in planet_names or planet2 not in planet_names or planet1 == planet2:\n return ()\n planet1_index = planet_names.index(planet1)\n planet2_index = planet_names.index(planet2)\n if planet1_index < planet2_index:\n return (planet_names[planet1_index + 1: planet2_index])\n else:\n return (planet_names[planet2_index + 1 : planet1_index])\n", "description": "我们的太阳系中有八颗行星:最靠近太阳的是水星,其次是金星,然后是地球、火星、木星、土星、天王星和海王星。编写一个函数,该函数接受两个行星名称作为字符串planet1和planet2。该函数应返回一个元组,其中包含所有轨道位于planet1和planet2之间的行星,按距离太阳的近度排序。如果planet1或planet2不是正确的行星名称,则函数应返回一个空元组。示例\n ", "natural_language": "Chinese"} 72 | {"task_id": "python/71", "prompt": "\ndef x_or_y(n, x, y):\n \"\"\"一个简单的程序,如果n是质数,则应返回x的值,否则应返回y的值。\n\n 例子:\n \n for x_or_y(7, 34, 12) == 34\n for x_or_y(15, 8, 5) == 5\n \n \"\"\"\n", "entry_point": "x_or_y", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(7, 34, 12) == 34\n assert candidate(15, 8, 5) == 5\n assert candidate(3, 33, 5212) == 33\n assert candidate(1259, 3, 52) == 3\n assert candidate(7919, -1, 12) == -1\n assert candidate(3609, 1245, 583) == 583\n assert candidate(91, 56, 129) == 129\n assert candidate(6, 34, 1234) == 1234\n \n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(1, 2, 0) == 0\n assert candidate(2, 2, 0) == 2\n\n", "language": "python", "canonical_solution": " if n == 1:\n return y\n for i in range(2, n):\n if n % i == 0:\n return y\n break\n else:\n return x\n", "description": "一个简单的程序,如果n是质数,则应返回x的值,否则应返回y的值。\n\n 例子:\n ", "natural_language": "Chinese"} 73 | {"task_id": "python/72", "prompt": "\ndef double_the_difference(lst):\n '''\n 给定一个数字列表,返回列表中奇数的平方和。忽略负数或非整数。\n\n double_the_difference([1, 3, 2, 0]) == 1 + 9 + 0 + 0 = 10\ndouble_the_difference([-1, -2, 0]) == 0\ndouble_the_difference([9, -2]) == 81\ndouble_the_difference([0]) == 0\n\n 如果输入列表为空,则返回0。\n \n '''\n", "entry_point": "double_the_difference", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate([]) == 0 , \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate([5, 4]) == 25 , \"This prints if this assert fails 2 (good for debugging!)\"\n assert candidate([0.1, 0.2, 0.3]) == 0 , \"This prints if this assert fails 3 (good for debugging!)\"\n assert candidate([-10, -20, -30]) == 0 , \"This prints if this assert fails 4 (good for debugging!)\"\n\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate([-1, -2, 8]) == 0, \"This prints if this assert fails 5 (also good for debugging!)\"\n assert candidate([0.2, 3, 5]) == 34, \"This prints if this assert fails 6 (also good for debugging!)\"\n lst = list(range(-99, 100, 2))\n odd_sum = sum([i**2 for i in lst if i%2!=0 and i > 0])\n assert candidate(lst) == odd_sum , \"This prints if this assert fails 7 (good for debugging!)\"\n\n", "language": "python", "canonical_solution": " return sum([i**2 for i in lst if i > 0 and i%2!=0 and \".\" not in str(i)])\n", "description": "给定一个数字列表,返回列表中奇数的平方和。忽略负数或非整数。\n\n double_the_difference([1, 3, 2, 0]) == 1 + 9 + 0 + 0 = 10\ndouble_the_difference([-1, -2, 0]) == 0\ndouble_the_difference([9, -2]) == 81\ndouble_the_difference([0]) == 0\n\n 如果输入列表为空,则返回0。\n ", "natural_language": "Chinese"} 74 | {"task_id": "python/73", "prompt": "\ndef Strongest_Extension(class_name, extensions):\n \"\"\"你将得到一个类的名称(一个字符串)和一个扩展名列表。扩展名用于加载附加的类到该类中。扩展名的强度如下:让CAP为扩展名中大写字母的数量,让SM为扩展名中小写字母的数量,强度由CAP-SM分数给出。您应该找到最强的扩展名并返回一个字符串,格式为:ClassName.StrongestExtensionName。如果有两个或更多的扩展名具有相同的强度,则应选择列表中先出现的扩展名。例如,如果您给出“Slices”作为类和扩展名列表:['SErviNGSliCes','Cheese','StuFfed'],则应返回'Slices.SErviNGSliCes',因为'SErviNGSliCes'是最强的扩展名(其强度为-1)。示例:\n \n for Strongest_Extension('my_class', ['AA', 'Be', 'CC']) == 'my_class.AA'\n \"\"\"\n", "entry_point": "Strongest_Extension", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate('Watashi', ['tEN', 'niNE', 'eIGHt8OKe']) == 'Watashi.eIGHt8OKe'\n assert candidate('Boku123', ['nani', 'NazeDa', 'YEs.WeCaNe', '32145tggg']) == 'Boku123.YEs.WeCaNe'\n assert candidate('__YESIMHERE', ['t', 'eMptY', 'nothing', 'zeR00', 'NuLl__', '123NoooneB321']) == '__YESIMHERE.NuLl__'\n assert candidate('K', ['Ta', 'TAR', 't234An', 'cosSo']) == 'K.TAR'\n assert candidate('__HAHA', ['Tab', '123', '781345', '-_-']) == '__HAHA.123'\n assert candidate('YameRore', ['HhAas', 'okIWILL123', 'WorkOut', 'Fails', '-_-']) == 'YameRore.okIWILL123'\n assert candidate('finNNalLLly', ['Die', 'NowW', 'Wow', 'WoW']) == 'finNNalLLly.WoW'\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate('_', ['Bb', '91245']) == '_.Bb'\n assert candidate('Sp', ['671235', 'Bb']) == 'Sp.671235'\n \n", "language": "python", "canonical_solution": " strong = extensions[0]\n my_val = len([x for x in extensions[0] if x.isalpha() and x.isupper()]) - len([x for x in extensions[0] if x.isalpha() and x.islower()])\n for s in extensions:\n val = len([x for x in s if x.isalpha() and x.isupper()]) - len([x for x in s if x.isalpha() and x.islower()])\n if val > my_val:\n strong = s\n my_val = val\n\n ans = class_name + \".\" + strong\n return ans\n\n", "description": "你将得到一个类的名称(一个字符串)和一个扩展名列表。扩展名用于加载附加的类到该类中。扩展名的强度如下:让CAP为扩展名中大写字母的数量,让SM为扩展名中小写字母的数量,强度由CAP-SM分数给出。您应该找到最强的扩展名并返回一个字符串,格式为:ClassName.StrongestExtensionName。如果有两个或更多的扩展名具有相同的强度,则应选择列表中先出现的扩展名。例如,如果您给出“Slices”作为类和扩展名列表:['SErviNGSliCes','Cheese','StuFfed'],则应返回'Slices.SErviNGSliCes',因为'SErviNGSliCes'是最强的扩展名(其强度为-1)。示例:\n ", "natural_language": "Chinese"} 75 | {"task_id": "python/74", "prompt": "\ndef cycpattern_check(a , b):\n \"\"\"给定两个单词。如果第二个单词或其任何旋转是第一个单词的子字符串,则需要返回True。\n \n cycpattern_check(\"abcd\",\"abd\") => False\n cycpattern_check(\"hello\",\"ell\") => True\n cycpattern_check(\"whassup\",\"psus\") => False\n cycpattern_check(\"abab\",\"baa\") => True\n cycpattern_check(\"efef\",\"eeff\") => False\n cycpattern_check(\"himenss\",\"simen\") => True\n\n \"\"\"\n", "entry_point": "cycpattern_check", "test": "def check(candidate):\n\n # Check some simple cases\n #assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n\n # Check some edge cases that are easy to work out by hand.\n #assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate(\"xyzw\",\"xyw\") == False , \"test #0\"\n assert candidate(\"yello\",\"ell\") == True , \"test #1\"\n assert candidate(\"whattup\",\"ptut\") == False , \"test #2\"\n assert candidate(\"efef\",\"fee\") == True , \"test #3\"\n assert candidate(\"abab\",\"aabb\") == False , \"test #4\"\n assert candidate(\"winemtt\",\"tinem\") == True , \"test #5\"\n\n", "language": "python", "canonical_solution": " l = len(b)\n pat = b + b\n for i in range(len(a) - l + 1):\n for j in range(l + 1):\n if a[i:i+l] == pat[j:j+l]:\n return True\n return False\n", "description": "给定两个单词。如果第二个单词或其任何旋转是第一个单词的子字符串,则需要返回True。\n ", "natural_language": "Chinese"} 76 | {"task_id": "python/75", "prompt": "\ndef int_to_mini_roman(number):\n \"\"\"\n 给定一个正整数,将其转换为罗马数字字符串,并以小写形式返回。\n限制条件:1 <= num <= 1000\n\n 示例:\n \n >>> int_to_mini_roman(19) == 'xix'\n >>> int_to_mini_roman(152) == 'clii'\n >>> int_to_mini_roman(426) == 'cdxxvi'\n \"\"\"\n", "entry_point": "int_to_mini_roman", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(19) == 'xix'\n assert candidate(152) == 'clii'\n assert candidate(251) == 'ccli'\n assert candidate(426) == 'cdxxvi'\n assert candidate(500) == 'd'\n assert candidate(1) == 'i'\n assert candidate(4) == 'iv'\n assert candidate(43) == 'xliii'\n assert candidate(90) == 'xc'\n assert candidate(94) == 'xciv'\n assert candidate(532) == 'dxxxii'\n assert candidate(900) == 'cm'\n assert candidate(994) == 'cmxciv'\n assert candidate(1000) == 'm'\n\n # Check some edge cases that are easy to work out by hand.\n assert True\n\n", "language": "python", "canonical_solution": " num = [1, 4, 5, 9, 10, 40, 50, 90, \n 100, 400, 500, 900, 1000] \n sym = [\"I\", \"IV\", \"V\", \"IX\", \"X\", \"XL\", \n \"L\", \"XC\", \"C\", \"CD\", \"D\", \"CM\", \"M\"] \n i = 12\n res = ''\n while number: \n div = number // num[i] \n number %= num[i] \n while div: \n res += sym[i] \n div -= 1\n i -= 1\n return res.lower()\n", "description": "给定一个正整数,将其转换为罗马数字字符串,并以小写形式返回。\n限制条件:1 <= num <= 1000\n\n 示例:\n ", "natural_language": "Chinese"} 77 | {"task_id": "python/76", "prompt": "\ndef right_angle_triangle(a, b, c):\n '''\n 给定三角形的三条边长。如果这三条边可以组成一个直角三角形,则返回True,否则返回False。\n 直角三角形是一个其中一个角是直角或90度的三角形。\n 例子:\n \n right_angle_triangle(3, 4, 5) == True\n right_angle_triangle(1, 2, 3) == False\n '''\n", "entry_point": "right_angle_triangle", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(3, 4, 5) == True, \"This prints if this assert fails 1 (good for debugging!)\"\n assert candidate(1, 2, 3) == False\n assert candidate(10, 6, 8) == True\n assert candidate(2, 2, 2) == False\n assert candidate(7, 24, 25) == True\n assert candidate(10, 5, 7) == False\n assert candidate(5, 12, 13) == True\n assert candidate(15, 8, 17) == True\n assert candidate(48, 55, 73) == True\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(1, 1, 1) == False, \"This prints if this assert fails 2 (also good for debugging!)\"\n assert candidate(2, 2, 10) == False\n\n", "language": "python", "canonical_solution": " return a*a == b*b + c*c or b*b == a*a + c*c or c*c == a*a + b*b\n", "description": "给定三角形的三条边长。如果这三条边可以组成一个直角三角形,则返回True,否则返回False。\n 直角三角形是一个其中一个角是直角或90度的三角形。\n 例子:\n ", "natural_language": "Chinese"} 78 | {"task_id": "python/77", "prompt": "\ndef solve(s):\n \"\"\"给定一个字符串s。\n如果s[i]是一个字母,将其大小写反转,从小写变为大写或反之亦然,\n否则保持不变。\n如果字符串不包含字母,则反转字符串。\n函数应返回结果字符串。\n例子\n \n solve(\"1234\") = \"4321\"\n solve(\"ab\") = \"AB\"\n solve(\"#a@C\") = \"#A@c\"\n \"\"\"\n", "entry_point": "solve", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(\"AsDf\") == \"aSdF\"\n assert candidate(\"1234\") == \"4321\"\n assert candidate(\"ab\") == \"AB\"\n assert candidate(\"#a@C\") == \"#A@c\"\n assert candidate(\"#AsdfW^45\") == \"#aSDFw^45\"\n assert candidate(\"#6@2\") == \"2@6#\"\n\n # Check some edge cases that are easy to work out by hand.\n assert candidate(\"#$a^D\") == \"#$A^d\"\n assert candidate(\"#ccc\") == \"#CCC\"\n\n # Don't remove this line:\n", "language": "python", "canonical_solution": " flg = 0\n idx = 0\n new_str = list(s)\n for i in s:\n if i.isalpha():\n new_str[idx] = i.swapcase()\n flg = 1\n idx += 1\n s = \"\"\n for i in new_str:\n s += i\n if flg == 0:\n return s[len(s)::-1]\n return s\n", "description": "给定一个字符串s。\n如果s[i]是一个字母,将其大小写反转,从小写变为大写或反之亦然,\n否则保持不变。\n如果字符串不包含字母,则反转字符串。\n函数应返回结果字符串。\n例子\n ", "natural_language": "Chinese"} 79 | {"task_id": "python/78", "prompt": "\ndef string_to_md5(text):\n \"\"\"\n 给定一个字符串“text”,返回其MD5哈希等效字符串。\n如果“text”是一个空字符串,则返回null。\n \n\n >>> string_to_md5('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n \"\"\"\n", "entry_point": "string_to_md5", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n assert candidate('') == None\n assert candidate('A B C') == '0ef78513b0cb8cef12743f5aeb35f888'\n assert candidate('password') == '5f4dcc3b5aa765d61d8327deb882cf99'\n\n # Check some edge cases that are easy to work out by hand.\n assert True\n\n", "language": "python", "canonical_solution": " import hashlib\n return hashlib.md5(text.encode('ascii')).hexdigest() if text else None\n", "description": "给定一个字符串“text”,返回其MD5哈希等效字符串。\n如果“text”是一个空字符串,则返回null。\n ", "natural_language": "Chinese"} 80 | {"task_id": "python/79", "prompt": "\ndef generate_integers(a, b):\n \"\"\"\n 给定两个正整数a和b,返回a和b之间的偶数位数字,按升序排列。\n\n 例如:\n \n generate_integers(2, 8) => [2, 4, 6, 8]\n generate_integers(8, 2) => [2, 4, 6, 8]\n generate_integers(10, 14) => []\n \"\"\"\n", "entry_point": "generate_integers", "test": "def check(candidate):\n\n # Check some simple cases\n assert candidate(2, 10) == [2, 4, 6, 8], \"Test 1\"\n assert candidate(10, 2) == [2, 4, 6, 8], \"Test 2\"\n assert candidate(132, 2) == [2, 4, 6, 8], \"Test 3\"\n assert candidate(17,89) == [], \"Test 4\"\n\n # Check some edge cases that are easy to work out by hand.\n assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n", "language": "python", "canonical_solution": " lower = max(2, min(a, b))\n upper = min(8, max(a, b))\n\n return [i for i in range(lower, upper+1) if i % 2 == 0]\n", "description": "给定两个正整数a和b,返回a和b之间的偶数位数字,按升序排列。\n\n 例如:\n ", "natural_language": "Chinese"} 81 | -------------------------------------------------------------------------------- /mxeval/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info/* 3 | .DS_Store -------------------------------------------------------------------------------- /mxeval/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | 3 | ## v1.1 4 | * Bugfix:
5 | Fix unit test cases for 47 problems’ test assertions for C#/TypeScript/Go, which represents ~5% of all problems:
6 | Root cause of the issue is a possibility for the input parameters to the canonical solutions get mutated as a side-affect which cause the captured input to mismatch.
7 | We fix this issue by saving another copy of the function input before passing it for execution. -------------------------------------------------------------------------------- /mxeval/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /mxeval/NOTICE: -------------------------------------------------------------------------------- 1 | MBXP+: Multi-lingual Execution-Based Evaluation 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -------------------------------------------------------------------------------- /mxeval/THIRD_PARTY_LICENSES: -------------------------------------------------------------------------------- 1 | The Multi-lingual Execution-Based Evaluation repository includes the following third-party software/licensing: 2 | 3 | ** (1) in mxeval/, we release Multi-lingual Execution-Based Evaluation we adapt the HumanEval code repository into a multi-lingual version which supports evaluation of all our datasets. The original code and dataset are from https://github.com/openai/human-eval. 4 | (2) In data/multilingual_humaneval, we release Multi-lingual HumanEval where we adapt the HumanEval dataset by OpenAI into multiple datasets in different programming languages. The original code and dataset are from https://github.com/openai/human-eval/tree/master/data. 5 | 6 | The MIT License 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | 26 | 27 | ---------------- 28 | 29 | 30 | ** In data/mbxp, we release MBXP we adapt the MBPP dataset by Google into multiple datasets in different programming languages. The original datasets are from https://github.com/google-research/google-research/tree/master/mbpp and https://huggingface.co/datasets/mbpp. 31 | 32 | 33 | Attribution 4.0 International 34 | 35 | ======================================================================= 36 | 37 | Creative Commons Corporation ("Creative Commons") is not a law firm and 38 | does not provide legal services or legal advice. Distribution of 39 | Creative Commons public licenses does not create a lawyer-client or 40 | other relationship. Creative Commons makes its licenses and related 41 | information available on an "as-is" basis. Creative Commons gives no 42 | warranties regarding its licenses, any material licensed under their 43 | terms and conditions, or any related information. Creative Commons 44 | disclaims all liability for damages resulting from their use to the 45 | fullest extent possible. 46 | 47 | Using Creative Commons Public Licenses 48 | 49 | Creative Commons public licenses provide a standard set of terms and 50 | conditions that creators and other rights holders may use to share 51 | original works of authorship and other material subject to copyright 52 | and certain other rights specified in the public license below. The 53 | following considerations are for informational purposes only, are not 54 | exhaustive, and do not form part of our licenses. 55 | 56 | Considerations for licensors: Our public licenses are 57 | intended for use by those authorized to give the public 58 | permission to use material in ways otherwise restricted by 59 | copyright and certain other rights. Our licenses are 60 | irrevocable. Licensors should read and understand the terms 61 | and conditions of the license they choose before applying it. 62 | Licensors should also secure all rights necessary before 63 | applying our licenses so that the public can reuse the 64 | material as expected. Licensors should clearly mark any 65 | material not subject to the license. This includes other CC- 66 | licensed material, or material used under an exception or 67 | limitation to copyright. More considerations for licensors: 68 | wiki.creativecommons.org/Considerations_for_licensors 69 | 70 | Considerations for the public: By using one of our public 71 | licenses, a licensor grants the public permission to use the 72 | licensed material under specified terms and conditions. If 73 | the licensor's permission is not necessary for any reason--for 74 | example, because of any applicable exception or limitation to 75 | copyright--then that use is not regulated by the license. Our 76 | licenses grant only permissions under copyright and certain 77 | other rights that a licensor has authority to grant. Use of 78 | the licensed material may still be restricted for other 79 | reasons, including because others have copyright or other 80 | rights in the material. A licensor may make special requests, 81 | such as asking that all changes be marked or described. 82 | Although not required by our licenses, you are encouraged to 83 | respect those requests where reasonable. More_considerations 84 | for the public: 85 | wiki.creativecommons.org/Considerations_for_licensees 86 | 87 | ======================================================================= 88 | 89 | Creative Commons Attribution 4.0 International Public License 90 | 91 | By exercising the Licensed Rights (defined below), You accept and agree 92 | to be bound by the terms and conditions of this Creative Commons 93 | Attribution 4.0 International Public License ("Public License"). To the 94 | extent this Public License may be interpreted as a contract, You are 95 | granted the Licensed Rights in consideration of Your acceptance of 96 | these terms and conditions, and the Licensor grants You such rights in 97 | consideration of benefits the Licensor receives from making the 98 | Licensed Material available under these terms and conditions. 99 | 100 | 101 | Section 1 -- Definitions. 102 | 103 | a. Adapted Material means material subject to Copyright and Similar 104 | Rights that is derived from or based upon the Licensed Material 105 | and in which the Licensed Material is translated, altered, 106 | arranged, transformed, or otherwise modified in a manner requiring 107 | permission under the Copyright and Similar Rights held by the 108 | Licensor. For purposes of this Public License, where the Licensed 109 | Material is a musical work, performance, or sound recording, 110 | Adapted Material is always produced where the Licensed Material is 111 | synched in timed relation with a moving image. 112 | 113 | b. Adapter's License means the license You apply to Your Copyright 114 | and Similar Rights in Your contributions to Adapted Material in 115 | accordance with the terms and conditions of this Public License. 116 | 117 | c. Copyright and Similar Rights means copyright and/or similar rights 118 | closely related to copyright including, without limitation, 119 | performance, broadcast, sound recording, and Sui Generis Database 120 | Rights, without regard to how the rights are labeled or 121 | categorized. For purposes of this Public License, the rights 122 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 123 | Rights. 124 | 125 | d. Effective Technological Measures means those measures that, in the 126 | absence of proper authority, may not be circumvented under laws 127 | fulfilling obligations under Article 11 of the WIPO Copyright 128 | Treaty adopted on December 20, 1996, and/or similar international 129 | agreements. 130 | 131 | e. Exceptions and Limitations means fair use, fair dealing, and/or 132 | any other exception or limitation to Copyright and Similar Rights 133 | that applies to Your use of the Licensed Material. 134 | 135 | f. Licensed Material means the artistic or literary work, database, 136 | or other material to which the Licensor applied this Public 137 | License. 138 | 139 | g. Licensed Rights means the rights granted to You subject to the 140 | terms and conditions of this Public License, which are limited to 141 | all Copyright and Similar Rights that apply to Your use of the 142 | Licensed Material and that the Licensor has authority to license. 143 | 144 | h. Licensor means the individual(s) or entity(ies) granting rights 145 | under this Public License. 146 | 147 | i. Share means to provide material to the public by any means or 148 | process that requires permission under the Licensed Rights, such 149 | as reproduction, public display, public performance, distribution, 150 | dissemination, communication, or importation, and to make material 151 | available to the public including in ways that members of the 152 | public may access the material from a place and at a time 153 | individually chosen by them. 154 | 155 | j. Sui Generis Database Rights means rights other than copyright 156 | resulting from Directive 96/9/EC of the European Parliament and of 157 | the Council of 11 March 1996 on the legal protection of databases, 158 | as amended and/or succeeded, as well as other essentially 159 | equivalent rights anywhere in the world. 160 | 161 | k. You means the individual or entity exercising the Licensed Rights 162 | under this Public License. Your has a corresponding meaning. 163 | 164 | 165 | Section 2 -- Scope. 166 | 167 | a. License grant. 168 | 169 | 1. Subject to the terms and conditions of this Public License, 170 | the Licensor hereby grants You a worldwide, royalty-free, 171 | non-sublicensable, non-exclusive, irrevocable license to 172 | exercise the Licensed Rights in the Licensed Material to: 173 | 174 | a. reproduce and Share the Licensed Material, in whole or 175 | in part; and 176 | 177 | b. produce, reproduce, and Share Adapted Material. 178 | 179 | 2. Exceptions and Limitations. For the avoidance of doubt, where 180 | Exceptions and Limitations apply to Your use, this Public 181 | License does not apply, and You do not need to comply with 182 | its terms and conditions. 183 | 184 | 3. Term. The term of this Public License is specified in Section 185 | 6(a). 186 | 187 | 4. Media and formats; technical modifications allowed. The 188 | Licensor authorizes You to exercise the Licensed Rights in 189 | all media and formats whether now known or hereafter created, 190 | and to make technical modifications necessary to do so. The 191 | Licensor waives and/or agrees not to assert any right or 192 | authority to forbid You from making technical modifications 193 | necessary to exercise the Licensed Rights, including 194 | technical modifications necessary to circumvent Effective 195 | Technological Measures. For purposes of this Public License, 196 | simply making modifications authorized by this Section 2(a) 197 | (4) never produces Adapted Material. 198 | 199 | 5. Downstream recipients. 200 | 201 | a. Offer from the Licensor -- Licensed Material. Every 202 | recipient of the Licensed Material automatically 203 | receives an offer from the Licensor to exercise the 204 | Licensed Rights under the terms and conditions of this 205 | Public License. 206 | 207 | b. No downstream restrictions. You may not offer or impose 208 | any additional or different terms or conditions on, or 209 | apply any Effective Technological Measures to, the 210 | Licensed Material if doing so restricts exercise of the 211 | Licensed Rights by any recipient of the Licensed 212 | Material. 213 | 214 | 6. No endorsement. Nothing in this Public License constitutes or 215 | may be construed as permission to assert or imply that You 216 | are, or that Your use of the Licensed Material is, connected 217 | with, or sponsored, endorsed, or granted official status by, 218 | the Licensor or others designated to receive attribution as 219 | provided in Section 3(a)(1)(A)(i). 220 | 221 | b. Other rights. 222 | 223 | 1. Moral rights, such as the right of integrity, are not 224 | licensed under this Public License, nor are publicity, 225 | privacy, and/or other similar personality rights; however, to 226 | the extent possible, the Licensor waives and/or agrees not to 227 | assert any such rights held by the Licensor to the limited 228 | extent necessary to allow You to exercise the Licensed 229 | Rights, but not otherwise. 230 | 231 | 2. Patent and trademark rights are not licensed under this 232 | Public License. 233 | 234 | 3. To the extent possible, the Licensor waives any right to 235 | collect royalties from You for the exercise of the Licensed 236 | Rights, whether directly or through a collecting society 237 | under any voluntary or waivable statutory or compulsory 238 | licensing scheme. In all other cases the Licensor expressly 239 | reserves any right to collect such royalties. 240 | 241 | 242 | Section 3 -- License Conditions. 243 | 244 | Your exercise of the Licensed Rights is expressly made subject to the 245 | following conditions. 246 | 247 | a. Attribution. 248 | 249 | 1. If You Share the Licensed Material (including in modified 250 | form), You must: 251 | 252 | a. retain the following if it is supplied by the Licensor 253 | with the Licensed Material: 254 | 255 | i. identification of the creator(s) of the Licensed 256 | Material and any others designated to receive 257 | attribution, in any reasonable manner requested by 258 | the Licensor (including by pseudonym if 259 | designated); 260 | 261 | ii. a copyright notice; 262 | 263 | iii. a notice that refers to this Public License; 264 | 265 | iv. a notice that refers to the disclaimer of 266 | warranties; 267 | 268 | v. a URI or hyperlink to the Licensed Material to the 269 | extent reasonably practicable; 270 | 271 | b. indicate if You modified the Licensed Material and 272 | retain an indication of any previous modifications; and 273 | 274 | c. indicate the Licensed Material is licensed under this 275 | Public License, and include the text of, or the URI or 276 | hyperlink to, this Public License. 277 | 278 | 2. You may satisfy the conditions in Section 3(a)(1) in any 279 | reasonable manner based on the medium, means, and context in 280 | which You Share the Licensed Material. For example, it may be 281 | reasonable to satisfy the conditions by providing a URI or 282 | hyperlink to a resource that includes the required 283 | information. 284 | 285 | 3. If requested by the Licensor, You must remove any of the 286 | information required by Section 3(a)(1)(A) to the extent 287 | reasonably practicable. 288 | 289 | 4. If You Share Adapted Material You produce, the Adapter's 290 | License You apply must not prevent recipients of the Adapted 291 | Material from complying with this Public License. 292 | 293 | 294 | Section 4 -- Sui Generis Database Rights. 295 | 296 | Where the Licensed Rights include Sui Generis Database Rights that 297 | apply to Your use of the Licensed Material: 298 | 299 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 300 | to extract, reuse, reproduce, and Share all or a substantial 301 | portion of the contents of the database; 302 | 303 | b. if You include all or a substantial portion of the database 304 | contents in a database in which You have Sui Generis Database 305 | Rights, then the database in which You have Sui Generis Database 306 | Rights (but not its individual contents) is Adapted Material; and 307 | 308 | c. You must comply with the conditions in Section 3(a) if You Share 309 | all or a substantial portion of the contents of the database. 310 | 311 | For the avoidance of doubt, this Section 4 supplements and does not 312 | replace Your obligations under this Public License where the Licensed 313 | Rights include other Copyright and Similar Rights. 314 | 315 | 316 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 317 | 318 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 319 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 320 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 321 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 322 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 323 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 324 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 325 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 326 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 327 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 328 | 329 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 330 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 331 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 332 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 333 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 334 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 335 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 336 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 337 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 338 | 339 | c. The disclaimer of warranties and limitation of liability provided 340 | above shall be interpreted in a manner that, to the extent 341 | possible, most closely approximates an absolute disclaimer and 342 | waiver of all liability. 343 | 344 | 345 | Section 6 -- Term and Termination. 346 | 347 | a. This Public License applies for the term of the Copyright and 348 | Similar Rights licensed here. However, if You fail to comply with 349 | this Public License, then Your rights under this Public License 350 | terminate automatically. 351 | 352 | b. Where Your right to use the Licensed Material has terminated under 353 | Section 6(a), it reinstates: 354 | 355 | 1. automatically as of the date the violation is cured, provided 356 | it is cured within 30 days of Your discovery of the 357 | violation; or 358 | 359 | 2. upon express reinstatement by the Licensor. 360 | 361 | For the avoidance of doubt, this Section 6(b) does not affect any 362 | right the Licensor may have to seek remedies for Your violations 363 | of this Public License. 364 | 365 | c. For the avoidance of doubt, the Licensor may also offer the 366 | Licensed Material under separate terms or conditions or stop 367 | distributing the Licensed Material at any time; however, doing so 368 | will not terminate this Public License. 369 | 370 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 371 | License. 372 | 373 | 374 | Section 7 -- Other Terms and Conditions. 375 | 376 | a. The Licensor shall not be bound by any additional or different 377 | terms or conditions communicated by You unless expressly agreed. 378 | 379 | b. Any arrangements, understandings, or agreements regarding the 380 | Licensed Material not stated herein are separate from and 381 | independent of the terms and conditions of this Public License. 382 | 383 | 384 | Section 8 -- Interpretation. 385 | 386 | a. For the avoidance of doubt, this Public License does not, and 387 | shall not be interpreted to, reduce, limit, restrict, or impose 388 | conditions on any use of the Licensed Material that could lawfully 389 | be made without permission under this Public License. 390 | 391 | b. To the extent possible, if any provision of this Public License is 392 | deemed unenforceable, it shall be automatically reformed to the 393 | minimum extent necessary to make it enforceable. If the provision 394 | cannot be reformed, it shall be severed from this Public License 395 | without affecting the enforceability of the remaining terms and 396 | conditions. 397 | 398 | c. No term or condition of this Public License will be waived and no 399 | failure to comply consented to unless expressly agreed to by the 400 | Licensor. 401 | 402 | d. Nothing in this Public License constitutes or may be interpreted 403 | as a limitation upon, or waiver of, any privileges and immunities 404 | that apply to the Licensor or You, including from the legal 405 | processes of any jurisdiction or authority. 406 | 407 | 408 | ======================================================================= 409 | 410 | Creative Commons is not a party to its public 411 | licenses. Notwithstanding, Creative Commons may elect to apply one of 412 | its public licenses to material it publishes and in those instances 413 | will be considered the “Licensor.” The text of the Creative Commons 414 | public licenses is dedicated to the public domain under the CC0 Public 415 | Domain Dedication. Except for the limited purpose of indicating that 416 | material is shared under a Creative Commons public license or as 417 | otherwise permitted by the Creative Commons policies published at 418 | creativecommons.org/policies, Creative Commons does not authorize the 419 | use of the trademark "Creative Commons" or any other trademark or logo 420 | of Creative Commons without its prior written consent including, 421 | without limitation, in connection with any unauthorized modifications 422 | to any of its public licenses or any other arrangements, 423 | understandings, or agreements concerning use of licensed material. For 424 | the avoidance of doubt, this paragraph does not form part of the 425 | public licenses. 426 | 427 | Creative Commons may be contacted at creativecommons.org. 428 | 429 | ---------------- 430 | 431 | 432 | ** In data/multilingual_mathqa, we release Multi-lingual MathQA where we adapt the MathQA-Python dataset by Google into multiple datasets in different programming languages. The original code and dataset are from https://github.com/google/trax/blob/master/trax/examples/MathQA_Python_generation_notebook.ipynb 433 | 434 | 435 | Apache License 436 | Version 2.0, January 2004 437 | http://www.apache.org/licenses/ 438 | 439 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 440 | 441 | 1. Definitions. 442 | 443 | "License" shall mean the terms and conditions for use, reproduction, 444 | and distribution as defined by Sections 1 through 9 of this document. 445 | 446 | "Licensor" shall mean the copyright owner or entity authorized by 447 | the copyright owner that is granting the License. 448 | 449 | "Legal Entity" shall mean the union of the acting entity and all 450 | other entities that control, are controlled by, or are under common 451 | control with that entity. For the purposes of this definition, 452 | "control" means (i) the power, direct or indirect, to cause the 453 | direction or management of such entity, whether by contract or 454 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 455 | outstanding shares, or (iii) beneficial ownership of such entity. 456 | 457 | "You" (or "Your") shall mean an individual or Legal Entity 458 | exercising permissions granted by this License. 459 | 460 | "Source" form shall mean the preferred form for making modifications, 461 | including but not limited to software source code, documentation 462 | source, and configuration files. 463 | 464 | "Object" form shall mean any form resulting from mechanical 465 | transformation or translation of a Source form, including but 466 | not limited to compiled object code, generated documentation, 467 | and conversions to other media types. 468 | 469 | "Work" shall mean the work of authorship, whether in Source or 470 | Object form, made available under the License, as indicated by a 471 | copyright notice that is included in or attached to the work 472 | (an example is provided in the Appendix below). 473 | 474 | "Derivative Works" shall mean any work, whether in Source or Object 475 | form, that is based on (or derived from) the Work and for which the 476 | editorial revisions, annotations, elaborations, or other modifications 477 | represent, as a whole, an original work of authorship. For the purposes 478 | of this License, Derivative Works shall not include works that remain 479 | separable from, or merely link (or bind by name) to the interfaces of, 480 | the Work and Derivative Works thereof. 481 | 482 | "Contribution" shall mean any work of authorship, including 483 | the original version of the Work and any modifications or additions 484 | to that Work or Derivative Works thereof, that is intentionally 485 | submitted to Licensor for inclusion in the Work by the copyright owner 486 | or by an individual or Legal Entity authorized to submit on behalf of 487 | the copyright owner. For the purposes of this definition, "submitted" 488 | means any form of electronic, verbal, or written communication sent 489 | to the Licensor or its representatives, including but not limited to 490 | communication on electronic mailing lists, source code control systems, 491 | and issue tracking systems that are managed by, or on behalf of, the 492 | Licensor for the purpose of discussing and improving the Work, but 493 | excluding communication that is conspicuously marked or otherwise 494 | designated in writing by the copyright owner as "Not a Contribution." 495 | 496 | "Contributor" shall mean Licensor and any individual or Legal Entity 497 | on behalf of whom a Contribution has been received by Licensor and 498 | subsequently incorporated within the Work. 499 | 500 | 2. Grant of Copyright License. Subject to the terms and conditions of 501 | this License, each Contributor hereby grants to You a perpetual, 502 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 503 | copyright license to reproduce, prepare Derivative Works of, 504 | publicly display, publicly perform, sublicense, and distribute the 505 | Work and such Derivative Works in Source or Object form. 506 | 507 | 3. Grant of Patent License. Subject to the terms and conditions of 508 | this License, each Contributor hereby grants to You a perpetual, 509 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 510 | (except as stated in this section) patent license to make, have made, 511 | use, offer to sell, sell, import, and otherwise transfer the Work, 512 | where such license applies only to those patent claims licensable 513 | by such Contributor that are necessarily infringed by their 514 | Contribution(s) alone or by combination of their Contribution(s) 515 | with the Work to which such Contribution(s) was submitted. If You 516 | institute patent litigation against any entity (including a 517 | cross-claim or counterclaim in a lawsuit) alleging that the Work 518 | or a Contribution incorporated within the Work constitutes direct 519 | or contributory patent infringement, then any patent licenses 520 | granted to You under this License for that Work shall terminate 521 | as of the date such litigation is filed. 522 | 523 | 4. Redistribution. You may reproduce and distribute copies of the 524 | Work or Derivative Works thereof in any medium, with or without 525 | modifications, and in Source or Object form, provided that You 526 | meet the following conditions: 527 | 528 | (a) You must give any other recipients of the Work or 529 | Derivative Works a copy of this License; and 530 | 531 | (b) You must cause any modified files to carry prominent notices 532 | stating that You changed the files; and 533 | 534 | (c) You must retain, in the Source form of any Derivative Works 535 | that You distribute, all copyright, patent, trademark, and 536 | attribution notices from the Source form of the Work, 537 | excluding those notices that do not pertain to any part of 538 | the Derivative Works; and 539 | 540 | (d) If the Work includes a "NOTICE" text file as part of its 541 | distribution, then any Derivative Works that You distribute must 542 | include a readable copy of the attribution notices contained 543 | within such NOTICE file, excluding those notices that do not 544 | pertain to any part of the Derivative Works, in at least one 545 | of the following places: within a NOTICE text file distributed 546 | as part of the Derivative Works; within the Source form or 547 | documentation, if provided along with the Derivative Works; or, 548 | within a display generated by the Derivative Works, if and 549 | wherever such third-party notices normally appear. The contents 550 | of the NOTICE file are for informational purposes only and 551 | do not modify the License. You may add Your own attribution 552 | notices within Derivative Works that You distribute, alongside 553 | or as an addendum to the NOTICE text from the Work, provided 554 | that such additional attribution notices cannot be construed 555 | as modifying the License. 556 | 557 | You may add Your own copyright statement to Your modifications and 558 | may provide additional or different license terms and conditions 559 | for use, reproduction, or distribution of Your modifications, or 560 | for any such Derivative Works as a whole, provided Your use, 561 | reproduction, and distribution of the Work otherwise complies with 562 | the conditions stated in this License. 563 | 564 | 5. Submission of Contributions. Unless You explicitly state otherwise, 565 | any Contribution intentionally submitted for inclusion in the Work 566 | by You to the Licensor shall be under the terms and conditions of 567 | this License, without any additional terms or conditions. 568 | Notwithstanding the above, nothing herein shall supersede or modify 569 | the terms of any separate license agreement you may have executed 570 | with Licensor regarding such Contributions. 571 | 572 | 6. Trademarks. This License does not grant permission to use the trade 573 | names, trademarks, service marks, or product names of the Licensor, 574 | except as required for reasonable and customary use in describing the 575 | origin of the Work and reproducing the content of the NOTICE file. 576 | 577 | 7. Disclaimer of Warranty. Unless required by applicable law or 578 | agreed to in writing, Licensor provides the Work (and each 579 | Contributor provides its Contributions) on an "AS IS" BASIS, 580 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 581 | implied, including, without limitation, any warranties or conditions 582 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 583 | PARTICULAR PURPOSE. You are solely responsible for determining the 584 | appropriateness of using or redistributing the Work and assume any 585 | risks associated with Your exercise of permissions under this License. 586 | 587 | 8. Limitation of Liability. In no event and under no legal theory, 588 | whether in tort (including negligence), contract, or otherwise, 589 | unless required by applicable law (such as deliberate and grossly 590 | negligent acts) or agreed to in writing, shall any Contributor be 591 | liable to You for damages, including any direct, indirect, special, 592 | incidental, or consequential damages of any character arising as a 593 | result of this License or out of the use or inability to use the 594 | Work (including but not limited to damages for loss of goodwill, 595 | work stoppage, computer failure or malfunction, or any and all 596 | other commercial damages or losses), even if such Contributor 597 | has been advised of the possibility of such damages. 598 | 599 | 9. Accepting Warranty or Additional Liability. While redistributing 600 | the Work or Derivative Works thereof, You may choose to offer, 601 | and charge a fee for, acceptance of support, warranty, indemnity, 602 | or other liability obligations and/or rights consistent with this 603 | License. However, in accepting such obligations, You may act only 604 | on Your own behalf and on Your sole responsibility, not on behalf 605 | of any other Contributor, and only if You agree to indemnify, 606 | defend, and hold each Contributor harmless for any liability 607 | incurred by, or claims asserted against, such Contributor by reason 608 | of your accepting any such warranty or additional liability. 609 | 610 | END OF TERMS AND CONDITIONS 611 | 612 | APPENDIX: How to apply the Apache License to your work. 613 | 614 | To apply the Apache License to your work, attach the following 615 | boilerplate notice, with the fields enclosed by brackets "[]" 616 | replaced with your own identifying information. (Don't include 617 | the brackets!) The text should be enclosed in the appropriate 618 | comment syntax for the file format. We also recommend that a 619 | file or class name and description of purpose be included on the 620 | same "printed page" as the copyright notice for easier 621 | identification within third-party archives. 622 | 623 | Copyright [yyyy] [name of copyright owner] 624 | 625 | Licensed under the Apache License, Version 2.0 (the "License"); 626 | you may not use this file except in compliance with the License. 627 | You may obtain a copy of the License at 628 | 629 | http://www.apache.org/licenses/LICENSE-2.0 630 | 631 | Unless required by applicable law or agreed to in writing, software 632 | distributed under the License is distributed on an "AS IS" BASIS, 633 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 634 | See the License for the specific language governing permissions and 635 | limitations under the License. 636 | -------------------------------------------------------------------------------- /mxeval/graphics/mbxp_java_conversion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/graphics/mbxp_java_conversion.png -------------------------------------------------------------------------------- /mxeval/graphics/paper_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/graphics/paper_summary.png -------------------------------------------------------------------------------- /mxeval/language_setup/amazon_linux_ami.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | printf "%100s" " " | tr ' ' '-' 4 | echo "" 5 | echo "setting up C++ " 6 | printf "%100s" " " | tr ' ' '-' 7 | echo "" 8 | sudo yum install -y gcc-c++ 9 | 10 | printf "%100s" " " | tr ' ' '-' 11 | echo "" 12 | echo "setting up Ruby " 13 | printf "%100s" " " | tr ' ' '-' 14 | echo "" 15 | sudo amazon-linux-extras install -y ruby3.0 16 | 17 | printf "%100s" " " | tr ' ' '-' 18 | echo "" 19 | echo "setting up php " 20 | printf "%100s" " " | tr ' ' '-' 21 | echo "" 22 | # ref https://techviewleo.com/install-php-8-on-amazon-linux/ 23 | sudo yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm 24 | sudo yum -y install https://rpms.remirepo.net/enterprise/remi-release-7.rpm 25 | 26 | sudo yum makecache 27 | 28 | sudo yum -y install yum-utils 29 | sudo yum-config-manager --disable 'remi-php*' 30 | sudo amazon-linux-extras enable php8.0 31 | sudo yum clean metadata 32 | sudo yum install -y php-{pear,cgi,pdo,common,curl,mbstring,gd,mysqlnd,gettext,bcmath,json,xml,fpm,intl,zip} 33 | 34 | printf "%100s" " " | tr ' ' '-' 35 | echo "" 36 | echo "setting up Java " 37 | printf "%100s" " " | tr ' ' '-' 38 | echo "" 39 | # ref https://docs.aws.amazon.com/corretto/latest/corretto-8-ug/amazon-linux-install.html 40 | sudo amazon-linux-extras enable corretto8 41 | sudo yum install -y java-1.8.0-amazon-corretto-devel 42 | 43 | printf "%100s" " " | tr ' ' '-' 44 | echo "" 45 | echo "setting up JavaScript " 46 | printf "%100s" " " | tr ' ' '-' 47 | echo "" 48 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash 49 | . ~/.nvm/nvm.sh 50 | nvm install 16.10.0 51 | node -e "console.log('Running Node.js ' + process.version)" 52 | npm i -g npm 53 | npm install -g lodash 54 | npm i --save lodash 55 | 56 | 57 | printf "%100s" " " | tr ' ' '-' 58 | echo "" 59 | echo "setting up TypeScript " 60 | printf "%100s" " " | tr ' ' '-' 61 | echo "" 62 | npm install -g typescript 63 | 64 | cd /usr/local 65 | printf "%100s" " " | tr ' ' '-' 66 | echo "" 67 | echo "setting up Go " 68 | printf "%100s" " " | tr ' ' '-' 69 | echo "" 70 | sudo wget https://go.dev/dl/go1.19.1.linux-amd64.tar.gz && sudo tar -xzvf go1.19.1.linux-amd64.tar.gz && sudo rm go1.19.1.linux-amd64.tar.gz 71 | printf "%100s" " " | tr ' ' '-' 72 | echo "" 73 | echo "setting up Swift " 74 | printf "%100s" " " | tr ' ' '-' 75 | echo "" 76 | sudo wget https://download.swift.org/swift-5.7-release/amazonlinux2/swift-5.7-RELEASE/swift-5.7-RELEASE-amazonlinux2.tar.gz && sudo tar -xzvf swift-5.7-RELEASE-amazonlinux2.tar.gz && sudo rm swift-5.7-RELEASE-amazonlinux2.tar.gz 77 | 78 | cd ~ 79 | printf "%100s" " " | tr ' ' '-' 80 | echo "" 81 | echo "setting up Scala " 82 | printf "%100s" " " | tr ' ' '-' 83 | echo "" 84 | wget http://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.rpm 85 | sudo yum -y install scala-2.11.8.rpm 86 | 87 | printf "%100s" " " | tr ' ' '-' 88 | echo "" 89 | echo "setting up C# " 90 | printf "%100s" " " | tr ' ' '-' 91 | echo "" 92 | sudo rpm -Uvh https://packages.microsoft.com/config/centos/7/packages-microsoft-prod.rpm 93 | sudo yum install -y dotnet-sdk-6.0 94 | 95 | printf "%100s" " " | tr ' ' '-' 96 | echo "" 97 | echo "setting up Perl " 98 | printf "%100s" " " | tr ' ' '-' 99 | echo "" 100 | sudo yum install -y perl-CPAN 101 | perl -MCPAN -e 'install Data::Compare' 102 | 103 | printf "%100s" " " | tr ' ' '-' 104 | echo "" 105 | echo "setting up Kotlin " 106 | printf "%100s" " " | tr ' ' '-' 107 | echo "" 108 | curl -s https://get.sdkman.io | bash 109 | export SDKMAN_DIR="$HOME/.sdkman" 110 | [[ -s "$HOME/.sdkman/bin/sdkman-init.sh" ]] && source "$HOME/.sdkman/bin/sdkman-init.sh" 111 | sdk install kotlin 112 | 113 | # writing out updated PATH to ~/.bashrc 114 | echo 'export PATH="${PATH}:/usr/local/go/bin:/usr/local/swift-5.7-RELEASE-amazonlinux2/usr/bin"' >> ~/.bashrc 115 | 116 | printf "%100s" " " | tr ' ' '-' 117 | echo "" 118 | echo 'Installation complete. Please start a new terminal session for changes to take place.' 119 | printf "%100s" " " | tr ' ' '-' 120 | echo "" 121 | -------------------------------------------------------------------------------- /mxeval/language_setup/ubuntu.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | printf "%100s" " " | tr ' ' '-' 4 | echo "" 5 | echo "setting up Ruby " 6 | printf "%100s" " " | tr ' ' '-' 7 | echo "" 8 | sudo apt update 9 | sudo apt install git curl libssl-dev libreadline-dev zlib1g-dev autoconf bison build-essential libyaml-dev libreadline-dev libncurses5-dev libffi-dev libgdbm-dev 10 | curl -fsSL https://github.com/rbenv/rbenv-installer/raw/HEAD/bin/rbenv-installer | bash 11 | echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bashrc 12 | echo 'eval "$(rbenv init -)"' >> ~/.bashrc 13 | source ~/.bashrc 14 | rbenv install 3.0.0 15 | rbenv global 3.0.0 16 | 17 | 18 | printf "%100s" " " | tr ' ' '-' 19 | echo "" 20 | echo "setting up php " 21 | printf "%100s" " " | tr ' ' '-' 22 | echo "" 23 | sudo apt install software-properties-common ca-certificates lsb-release apt-transport-https 24 | sudo add-apt-repository ppa:ondrej/php 25 | sudo apt update 26 | sudo apt install php8.0 27 | sudo apt install -y php-{pear,cgi,pdo,common,curl,mbstring,gd,mysqlnd,gettext,bcmath,json,xml,fpm,intl,zip} 28 | 29 | 30 | printf "%100s" " " | tr ' ' '-' 31 | echo "" 32 | echo "setting up Java " 33 | printf "%100s" " " | tr ' ' '-' 34 | echo "" 35 | sudo apt-get install openjdk-8-jdk 36 | 37 | 38 | printf "%100s" " " | tr ' ' '-' 39 | echo "" 40 | echo "setting up JavaScript " 41 | printf "%100s" " " | tr ' ' '-' 42 | echo "" 43 | sudo apt install curl 44 | curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash 45 | source ~/.bashrc 46 | sudo apt install npm 47 | nvm install 16.10.0 48 | node -e "console.log('Running Node.js ' + process.version)" 49 | npm i -g npm 50 | npm install -g lodash 51 | npm i --save lodash 52 | 53 | 54 | printf "%100s" " " | tr ' ' '-' 55 | echo "" 56 | echo "setting up TypeScript " 57 | printf "%100s" " " | tr ' ' '-' 58 | echo "" 59 | sudo npm install -g typescript 60 | 61 | cd /usr/local 62 | printf "%100s" " " | tr ' ' '-' 63 | echo "" 64 | echo "setting up Go " 65 | printf "%100s" " " | tr ' ' '-' 66 | echo "" 67 | # https://go.dev/doc/install 68 | sudo wget https://go.dev/dl/go1.19.1.linux-amd64.tar.gz && sudo tar -xzvf go1.19.1.linux-amd64.tar.gz && sudo rm go1.19.1.linux-amd64.tar.gz 69 | printf "%100s" " " | tr ' ' '-' 70 | echo "" 71 | echo "setting up Swift " 72 | printf "%100s" " " | tr ' ' '-' 73 | echo "" 74 | swift_release="swift-5.7-RELEASE-ubuntu20.04.tar.gz" 75 | sudo wget "https://download.swift.org/swift-5.7-release/ubuntu2004/swift-5.7-RELEASE/$swift_release" && sudo tar -xzvf $swift_release && sudo rm $swift_release 76 | 77 | cd ~ 78 | printf "%100s" " " | tr ' ' '-' 79 | echo "" 80 | echo "setting up Scala " 81 | printf "%100s" " " | tr ' ' '-' 82 | echo "" 83 | sudo apt-get install scala 84 | 85 | 86 | printf "%100s" " " | tr ' ' '-' 87 | echo "" 88 | echo "setting up C# " 89 | printf "%100s" " " | tr ' ' '-' 90 | echo "" 91 | sudo apt-get update && \ 92 | sudo apt-get install -y dotnet6 93 | 94 | printf "%100s" " " | tr ' ' '-' 95 | echo "" 96 | echo "setting up Perl " 97 | printf "%100s" " " | tr ' ' '-' 98 | echo "" 99 | perl -MCPAN -e 'install Data::Compare' 100 | 101 | printf "%100s" " " | tr ' ' '-' 102 | echo "" 103 | echo "setting up Kotlin " 104 | printf "%100s" " " | tr ' ' '-' 105 | echo "" 106 | sudo apt install zip 107 | sudo apt install unzip 108 | curl -s https://get.sdkman.io | bash 109 | export SDKMAN_DIR="$HOME/.sdkman" 110 | [[ -s "$HOME/.sdkman/bin/sdkman-init.sh" ]] && source "$HOME/.sdkman/bin/sdkman-init.sh" 111 | sdk install kotlin 112 | 113 | 114 | # writing out updated PATH to ~/.bashrc 115 | echo 'PATH="${PATH}:/usr/local/swift-5.7-RELEASE-ubuntu20.04/usr/bin:/usr/local/go/bin"' >> ~/.bashrc 116 | 117 | printf "%100s" " " | tr ' ' '-' 118 | echo "" 119 | echo 'Installation complete. Please start a new terminal session for changes to take place.' 120 | printf "%100s" " " | tr ' ' '-' 121 | echo "" 122 | -------------------------------------------------------------------------------- /mxeval/mxeval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/mxeval/__init__.py -------------------------------------------------------------------------------- /mxeval/mxeval/data.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, Dict 2 | import gzip 3 | import json 4 | import os 5 | 6 | 7 | ROOT = os.path.dirname(os.path.abspath(__file__)) 8 | HUMAN_EVAL = os.path.join(ROOT, "..", "data", "HumanEval.jsonl.gz") 9 | 10 | 11 | def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]: 12 | return {task["task_id"]: task for task in stream_jsonl(evalset_file)} 13 | 14 | 15 | def stream_jsonl(filename: str) -> Iterable[Dict]: 16 | """ 17 | Parses each jsonl line and yields it as a dictionary 18 | """ 19 | if filename.endswith(".gz"): 20 | with open(filename, "rb") as gzfp: 21 | with gzip.open(gzfp, 'rt') as fp: 22 | for line in fp: 23 | if any(not x.isspace() for x in line): 24 | yield json.loads(line) 25 | else: 26 | with open(filename, "r") as fp: 27 | for line in fp: 28 | if any(not x.isspace() for x in line): 29 | yield json.loads(line) 30 | 31 | 32 | def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False): 33 | """ 34 | Writes an iterable of dictionaries to jsonl 35 | """ 36 | if append: 37 | mode = 'ab' 38 | else: 39 | mode = 'wb' 40 | filename = os.path.expanduser(filename) 41 | if filename.endswith(".gz"): 42 | with open(filename, mode) as fp: 43 | with gzip.GzipFile(fileobj=fp, mode='wb') as gzfp: 44 | for x in data: 45 | gzfp.write((json.dumps(x) + "\n").encode('utf-8')) 46 | else: 47 | with open(filename, mode) as fp: 48 | for x in data: 49 | fp.write((json.dumps(x) + "\n").encode('utf-8')) 50 | 51 | 52 | def get_metadata(dataset, metadata_type="problem"): 53 | assert metadata_type in ["problem", "example"] 54 | assert dataset in ["mbxp", "multi-humaneval", "mathqa-x"], f"Unsupported dataset {dataset}" 55 | dataset_dirmap = {"mbxp": "mbxp", 56 | "multi-humaneval": "multilingual_humaneval", 57 | "mathqa-x": "multilingual_mathqa"} 58 | typemap = {"problem": "metadata.json", 59 | "example": "metadata_examples.json"} 60 | datadir = os.path.join(ROOT, "..", "data", dataset_dirmap[dataset]) 61 | path = os.path.join(datadir, typemap[metadata_type]) 62 | with open(path, "r") as f: 63 | metadata = json.load(f) 64 | return metadata, datadir 65 | 66 | 67 | def get_supported_langs(dataset): 68 | metadata, _ = get_metadata(dataset, metadata_type="problem") 69 | return list(metadata.keys()) 70 | 71 | 72 | def get_data(dataset="mbxp", language="python"): 73 | metadata, datadir = get_metadata(dataset, metadata_type="problem") 74 | if language.lower() not in metadata: 75 | raise ValueError(f"Language {language} not found in metadata file") 76 | datafile = metadata[language.lower()] 77 | print(f"Loading {dataset} | language = {language}") 78 | return read_problems(os.path.join(datadir, datafile)) 79 | 80 | 81 | # due to similar format, examples from mbxp are sufficient to be used 82 | # for few-shot prompting in multi-humaneval 83 | def get_examples(dataset="mbxp", language="python", num_examples=None): 84 | assert dataset in ["mbxp"], f"No fewshot examples in dataset {dataset}" 85 | metadata, datadir = get_metadata(dataset=dataset, metadata_type="example") 86 | if language.lower() not in metadata: 87 | raise ValueError(f"Language {language} not found in metadata file") 88 | datafile = metadata[language.lower()] 89 | print(f"Loading examples from {dataset} | language = {language}") 90 | # use streams 91 | if num_examples is None: 92 | # return the entire stream 93 | return stream_jsonl(os.path.join(datadir, datafile)) 94 | else: 95 | problems = get_data(dataset=dataset, language=language) 96 | stream = get_examples(dataset=dataset, language=language) 97 | examples = [] 98 | for idx, example in enumerate(stream): 99 | if idx == num_examples: 100 | break 101 | task_id = example["task_id"] 102 | prompt = problems[task_id]["prompt"] 103 | example["prompt"] = prompt 104 | examples.append(example) 105 | return examples 106 | -------------------------------------------------------------------------------- /mxeval/mxeval/evaluate_functional_correctness.py: -------------------------------------------------------------------------------- 1 | # Original Copyright 2021 OpenAI under MIT License. 2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | import sys 5 | import os 6 | 7 | import fire 8 | from mxeval.data import HUMAN_EVAL 9 | from mxeval.evaluation import evaluate_functional_correctness 10 | 11 | 12 | def entry_point( 13 | sample_file: str, 14 | problem_file: str = HUMAN_EVAL, 15 | k: tuple = (1, 10, 100, 1000), # from command line, use '1,10,100' for example 16 | n_workers: int = os.cpu_count() - 1, 17 | timeout: float = 15.0, 18 | ): 19 | """ 20 | Evaluates the functional correctness of generated samples, and writes 21 | results to f"{sample_file}_results.jsonl" 22 | """ 23 | print(f"\n\nEvaluating {sample_file}") 24 | k = list(map(int, k)) 25 | results = evaluate_functional_correctness( 26 | sample_file, k, n_workers, timeout, problem_file 27 | ) 28 | with open(sample_file + "_passatk.json", "w") as f: 29 | f.write(str(results)) 30 | print(results) 31 | 32 | 33 | def main(): 34 | fire.Fire(entry_point) 35 | 36 | 37 | sys.exit(main()) 38 | -------------------------------------------------------------------------------- /mxeval/mxeval/evaluation.py: -------------------------------------------------------------------------------- 1 | # Original Copyright 2021 OpenAI under MIT License. 2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | from io import UnsupportedOperation 5 | import itertools 6 | import os 7 | import time 8 | from collections import Counter, defaultdict 9 | from concurrent.futures import ThreadPoolExecutor, as_completed 10 | from typing import Dict, Iterable, List, Union 11 | 12 | import numpy as np 13 | import tqdm 14 | from mxeval.data import HUMAN_EVAL, read_problems, stream_jsonl, write_jsonl 15 | 16 | # Amazon modification 17 | # import check correctness for all languages 18 | from mxeval.execution import ( 19 | check_correctness, 20 | check_correctness_cpp, 21 | check_correctness_csharp, 22 | check_correctness_go, 23 | check_correctness_java, 24 | check_correctness_javascript, 25 | check_correctness_kotlin, 26 | check_correctness_perl, 27 | check_correctness_php, 28 | check_correctness_ruby, 29 | check_correctness_scala, 30 | check_correctness_swift, 31 | check_correctness_typescript, 32 | ) 33 | 34 | check_correctness_function_map = { 35 | "python": check_correctness, 36 | "java": check_correctness_java, 37 | "javascript": check_correctness_javascript, 38 | "typescript": check_correctness_typescript, 39 | "kotlin": check_correctness_kotlin, 40 | "ruby": check_correctness_ruby, 41 | "php": check_correctness_php, 42 | "cpp": check_correctness_cpp, 43 | "csharp": check_correctness_csharp, 44 | "go": check_correctness_go, 45 | "perl": check_correctness_perl, 46 | "scala": check_correctness_scala, 47 | "swift": check_correctness_swift, 48 | } 49 | 50 | def estimate_pass_at_k( 51 | num_samples: Union[int, List[int], np.ndarray], 52 | num_correct: Union[List[int], np.ndarray], 53 | k: int, 54 | ) -> np.ndarray: 55 | """ 56 | Estimates pass@k of each problem and returns them in an array. 57 | """ 58 | 59 | def estimator(n: int, c: int, k: int) -> float: 60 | """ 61 | Calculates 1 - comb(n - c, k) / comb(n, k). 62 | """ 63 | if n - c < k: 64 | return 1.0 65 | return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) 66 | 67 | if isinstance(num_samples, int): 68 | num_samples_it = itertools.repeat(num_samples, len(num_correct)) 69 | else: 70 | assert len(num_samples) == len(num_correct) 71 | num_samples_it = iter(num_samples) 72 | 73 | return np.array( 74 | [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)] 75 | ) 76 | 77 | def get_execute_function(lang): 78 | lang = lang.lower() 79 | assert lang in check_correctness_function_map, f"Language {lang} is not among the supported languages: {check_correctness_function_map.keys()}" 80 | return check_correctness_function_map[lang] 81 | 82 | def evaluate_functional_correctness( 83 | sample_file: str, 84 | k: List[int] = [1, 10, 100], 85 | n_workers: int = os.cpu_count() - 1, 86 | timeout: float = 10.0, 87 | problem_file: str = HUMAN_EVAL, 88 | ): 89 | """ 90 | Evaluates the functional correctness of generated samples, and writes 91 | results to f"{sample_file}_results.jsonl" 92 | """ 93 | 94 | if type(problem_file) is not dict: 95 | problems = read_problems(problem_file) 96 | else: 97 | print("Skip reading problems -- using problem_file (dict) as problems") 98 | problems = problem_file 99 | 100 | # see execution.py for details 101 | # Check the generated samples against test suites. 102 | check_correctness_function_map = { 103 | "python": check_correctness, 104 | "java": check_correctness_java, 105 | "javascript": check_correctness_javascript, 106 | "typescript": check_correctness_typescript, 107 | "kotlin": check_correctness_kotlin, 108 | "ruby": check_correctness_ruby, 109 | "php": check_correctness_php, 110 | "cpp": check_correctness_cpp, 111 | "csharp": check_correctness_csharp, 112 | "go": check_correctness_go, 113 | "perl": check_correctness_perl, 114 | "scala": check_correctness_scala, 115 | "swift": check_correctness_swift, 116 | } 117 | 118 | seed = int(time.time() * 1000000) % 1000000 119 | np.random.seed(seed=seed) # microsecond 120 | 121 | with ThreadPoolExecutor(max_workers=n_workers) as executor: 122 | futures = [] 123 | completion_id = Counter() 124 | n_samples = 0 125 | results = defaultdict(list) 126 | 127 | print("Reading samples...") 128 | for sample in tqdm.tqdm(stream_jsonl(sample_file)): 129 | task_id = sample["task_id"] 130 | completion = sample["completion"] 131 | args = (problems[task_id], completion, timeout, completion_id[task_id]) 132 | language = sample["language"] 133 | check_correctness_function = check_correctness_function_map[language] 134 | future = executor.submit(check_correctness_function, *args) 135 | futures.append(future) 136 | completion_id[task_id] += 1 137 | n_samples += 1 138 | 139 | assert len(completion_id) == len(problems), "Some problems are not attempted." 140 | 141 | print("Running test suites...") 142 | for future in tqdm.tqdm(as_completed(futures), total=len(futures)): 143 | result = future.result() # this is the execution stage 144 | results[result["task_id"]].append((result["completion_id"], result)) 145 | 146 | # common code for all languages 147 | # Calculate pass@k. 148 | total, correct = [], [] 149 | for result in results.values(): 150 | result.sort() 151 | passed = [r[1]["passed"] for r in result] 152 | total.append(len(passed)) 153 | correct.append(sum(passed)) 154 | total = np.array(total) 155 | correct = np.array(correct) 156 | 157 | ks = k 158 | pass_at_k = { 159 | f"pass@{k}": estimate_pass_at_k(total, correct, k).mean() 160 | for k in ks 161 | if (total >= k).all() 162 | } 163 | 164 | # Finally, save the results in one file: 165 | def combine_results(): 166 | for sample in stream_jsonl(sample_file): 167 | task_id = sample["task_id"] 168 | result = results[task_id].pop(0) 169 | sample["result"] = result[1]["result"] 170 | sample["passed"] = result[1]["passed"] 171 | sample["time_elapsed"] = result[1]["time_elapsed"] 172 | yield sample 173 | 174 | out_file = sample_file + "_results.jsonl" 175 | print(f"Writing results to {out_file}...") 176 | write_jsonl(out_file, tqdm.tqdm(combine_results(), total=n_samples)) 177 | 178 | return pass_at_k 179 | -------------------------------------------------------------------------------- /mxeval/mxeval/execution.py: -------------------------------------------------------------------------------- 1 | # Original Copyright 2021 OpenAI under MIT License. 2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # check_correctness_* functions are AWS additions 4 | 5 | import contextlib 6 | import faulthandler 7 | import io 8 | import multiprocessing 9 | import os 10 | import platform 11 | import random 12 | import shutil 13 | import signal 14 | import string 15 | import subprocess 16 | import tempfile 17 | import time 18 | import errno 19 | from typing import Dict, Optional 20 | import threading 21 | lock = threading.Lock() 22 | 23 | 24 | def check_correctness_java( 25 | problem: Dict, 26 | completion: str, 27 | timeout: float, 28 | completion_id: Optional[int] = None, 29 | verbose=False, 30 | language="java", 31 | compile_timeout: float = 100, 32 | ): 33 | """ 34 | Run all evaluation under java_exec_eval + randomized directory to avoid collision. 35 | Using subprocess with concurrent.futures for multi-thread evaluation. 36 | Make sure to clean up resources even if the test cases fail. 37 | """ 38 | 39 | current_dir = os.path.dirname(os.path.realpath(__file__)) 40 | entire_string = problem["prompt"] + completion + problem["test"] 41 | base_path = setup_base_path(current_dir, f"{language}_exec_eval", "") 42 | try: 43 | os.makedirs(base_path, exist_ok=False) 44 | except OSError as e: 45 | if e.errno != errno.EEXIST: 46 | raise 47 | path = os.path.join(base_path, f"main.{language}") 48 | 49 | with open(path, "w") as f: 50 | f.write(entire_string) 51 | 52 | try: 53 | exec_result_compile = subprocess.run( 54 | [f"javac", path], 55 | timeout=int(compile_timeout), 56 | stdout=subprocess.PIPE, 57 | stderr=subprocess.PIPE, 58 | text=True, 59 | ) 60 | compiled = exec_result_compile.returncode == 0 61 | if verbose: 62 | print("exec_result_compile", exec_result_compile) 63 | start = time.time() 64 | exec_result_run = subprocess.run( 65 | [f"java", "-cp", base_path, "Main"], 66 | timeout=int(timeout), 67 | stdout=subprocess.PIPE, 68 | stderr=subprocess.PIPE, 69 | text=True, 70 | ) 71 | elapsed = 1000.0 * (time.time() - start) 72 | if verbose: 73 | print("exec result run", exec_result_run) 74 | passed = exec_result_compile.returncode == 0 and exec_result_run.returncode == 0 75 | if exec_result_compile.returncode > 0: 76 | message = exec_result_compile.stderr 77 | else: 78 | message = exec_result_run.stderr 79 | 80 | except Exception as e: 81 | passed = False 82 | message = str(e) 83 | elapsed = None 84 | compiled = False 85 | 86 | try: 87 | shutil.rmtree(base_path) 88 | except Exception as e: 89 | if verbose: 90 | print(f"Error cleaning up directory {base_path}: {e}") 91 | 92 | return dict( 93 | task_id=problem["task_id"], 94 | passed=passed, 95 | result=message, 96 | completion_id=completion_id, 97 | time_elapsed=elapsed, 98 | compiled=compiled, 99 | ) 100 | 101 | 102 | def check_correctness_scala( 103 | problem: Dict, 104 | completion: str, 105 | timeout: float, 106 | completion_id: Optional[int] = None, 107 | verbose=False, 108 | language="scala", 109 | compile_timeout: float = 100, 110 | ): 111 | 112 | current_dir = os.path.dirname(os.path.realpath(__file__)) 113 | entire_string = problem["prompt"] + completion + problem["test"] 114 | base_path = setup_base_path(current_dir, f"{language}_exec_eval", "") 115 | try: 116 | os.makedirs(base_path, exist_ok=False) 117 | except OSError as e: 118 | if e.errno != errno.EEXIST: 119 | raise 120 | path = os.path.join(base_path, f"main.{language}") 121 | 122 | with open(path, "w") as f: 123 | f.write(entire_string) 124 | 125 | try: 126 | exec_result_compile = subprocess.run( 127 | [f"scalac", path, "-d", base_path], 128 | timeout=int(compile_timeout), 129 | stdout=subprocess.PIPE, 130 | stderr=subprocess.PIPE, 131 | text=True, 132 | ) 133 | compiled = exec_result_compile.returncode == 0 134 | if verbose: 135 | print("exec_result_compile", exec_result_compile) 136 | start = time.time() 137 | exec_result_run = subprocess.run( 138 | [f"scala", "-cp", base_path, "Main"], 139 | timeout=int(timeout), 140 | stdout=subprocess.PIPE, 141 | stderr=subprocess.PIPE, 142 | text=True, 143 | ) 144 | elapsed = 1000.0 * (time.time() - start) 145 | if verbose: 146 | print("exec result run", exec_result_run) 147 | passed = exec_result_compile.returncode == 0 and exec_result_run.returncode == 0 148 | if exec_result_compile.returncode > 0: 149 | message = exec_result_compile.stderr 150 | else: 151 | message = exec_result_run.stderr 152 | 153 | except Exception as e: 154 | passed = False 155 | message = str(e) 156 | elapsed = None 157 | compiled = False 158 | 159 | try: 160 | shutil.rmtree(base_path) 161 | except Exception as e: 162 | if verbose: 163 | print(f"Error cleaning up directory {base_path}: {e}") 164 | 165 | return dict( 166 | task_id=problem["task_id"], 167 | passed=passed, 168 | result=message, 169 | completion_id=completion_id, 170 | time_elapsed=elapsed, 171 | compiled=compiled, 172 | ) 173 | 174 | 175 | def check_correctness_perl( 176 | problem: Dict, 177 | completion: str, 178 | timeout: float, 179 | completion_id: Optional[int] = None, 180 | verbose=False, 181 | ): 182 | return check_correctness_helper( 183 | problem=problem, 184 | completion=completion, 185 | timeout=timeout, 186 | completion_id=completion_id, 187 | verbose=verbose, 188 | language="perl", 189 | extension=".pl", 190 | subprocess_command_lambda=lambda x: ["perl", f"{x}.pl"], 191 | ) 192 | 193 | 194 | def check_correctness_swift( 195 | problem: Dict, 196 | completion: str, 197 | timeout: float, 198 | completion_id: Optional[int] = None, 199 | verbose=False, 200 | ): 201 | return check_correctness_helper( 202 | problem=problem, 203 | completion=completion, 204 | timeout=timeout, 205 | completion_id=completion_id, 206 | verbose=verbose, 207 | language="swift", 208 | extension=".swift", 209 | subprocess_command_lambda=lambda x: ["swift", f"{x}.swift"], 210 | ) 211 | 212 | 213 | def check_correctness_javascript( 214 | problem: Dict, 215 | completion: str, 216 | timeout: float, 217 | completion_id: Optional[int] = None, 218 | verbose=False, 219 | ): 220 | return check_correctness_helper( 221 | problem=problem, 222 | completion=completion, 223 | timeout=timeout, 224 | completion_id=completion_id, 225 | verbose=verbose, 226 | language="javascript", 227 | extension=".js", 228 | subprocess_command_lambda=lambda x: ["node", f"{x}.js"], 229 | ) 230 | 231 | 232 | def check_correctness_typescript( 233 | problem: Dict, 234 | completion: str, 235 | timeout: float, 236 | completion_id: Optional[int] = None, 237 | verbose=False, 238 | ): 239 | return check_correctness_helper( 240 | problem=problem, 241 | completion=completion, 242 | timeout=timeout, 243 | completion_id=completion_id, 244 | verbose=verbose, 245 | language="typescript", 246 | extension=".ts", 247 | compile_command_lambda=lambda x: f"npx tsc {x} --target es5 --lib es2016".split(), 248 | subprocess_command_lambda=lambda x: ["node", f"{x}.js"], 249 | ) 250 | 251 | 252 | def check_correctness_ruby( 253 | problem: Dict, 254 | completion: str, 255 | timeout: float, 256 | completion_id: Optional[int] = None, 257 | verbose=False, 258 | ): 259 | return check_correctness_helper( 260 | problem=problem, 261 | completion=completion, 262 | timeout=timeout, 263 | completion_id=completion_id, 264 | verbose=verbose, 265 | language="ruby", 266 | extension=".rb", 267 | subprocess_command_lambda=lambda x: ["ruby", f"{x}.rb"], 268 | ) 269 | 270 | 271 | def check_correctness_kotlin( 272 | problem: Dict, 273 | completion: str, 274 | timeout: float, 275 | completion_id: Optional[int] = None, 276 | verbose=False, 277 | ): 278 | return check_correctness_helper( 279 | problem=problem, 280 | completion=completion, 281 | timeout=timeout, 282 | completion_id=completion_id, 283 | verbose=verbose, 284 | language="kotlin", 285 | extension=".kt", 286 | compile_command_lambda=lambda x: [ 287 | "kotlinc", 288 | f"{x}.kt", 289 | "-include-runtime", 290 | "-d", 291 | f"{x}.jar", 292 | ], 293 | compile_timeout=100, # needs longer than 20 sec 294 | subprocess_command_lambda=lambda x: ["java", "-jar", f"{x}.jar"], 295 | extra_cleanup=lambda x: f"{x}.jar", 296 | ) 297 | 298 | 299 | def check_correctness_php( 300 | problem: Dict, 301 | completion: str, 302 | timeout: float, 303 | completion_id: Optional[int] = None, 304 | verbose=False, 305 | ): 306 | return check_correctness_helper( 307 | problem=problem, 308 | completion=completion, 309 | timeout=timeout, 310 | completion_id=completion_id, 311 | verbose=verbose, 312 | language="php", 313 | extension=".php", 314 | subprocess_command_lambda=lambda x: ["php", f"{x}.php"], 315 | ) 316 | 317 | 318 | def check_correctness_go( 319 | problem: Dict, 320 | completion: str, 321 | timeout: float, 322 | completion_id: Optional[int] = None, 323 | verbose=False, 324 | ): 325 | return check_correctness_helper( 326 | problem=problem, 327 | completion=completion, 328 | timeout=timeout, 329 | completion_id=completion_id, 330 | verbose=verbose, 331 | language="go", 332 | extension=".go", 333 | subprocess_command_lambda=lambda x: ["go", "run", f"{x}.go"], 334 | ) 335 | 336 | 337 | def check_correctness_csharp( 338 | problem: Dict, 339 | completion: str, 340 | timeout: float, 341 | completion_id: Optional[int] = None, 342 | verbose=False, 343 | compilation_timeout: float = 100, 344 | ): 345 | current_dir = os.path.dirname(os.path.realpath(__file__)) 346 | program = problem["prompt"] + completion + problem["test"] 347 | # template c# project has all necessary DLLs 348 | template_cs_proj_zip = os.path.join(current_dir, "../resources/eval_csproj.zip") 349 | cs_eval_dir = setup_base_path(current_dir, "cs_eval", "") 350 | 351 | # extract zip into cs_eval_dir 352 | subprocess.check_call( 353 | f"unzip -q {template_cs_proj_zip} -d {cs_eval_dir}".split(), timeout=int(compilation_timeout) 354 | ) 355 | 356 | passed, message = None, None 357 | compiled = False 358 | 359 | try: 360 | cs_project_path = os.path.join(cs_eval_dir, "eval_csproj") 361 | # entrypoint 362 | cs_program_path = os.path.join(cs_project_path, "Program.cs") 363 | with open(cs_program_path, "w") as f1: 364 | f1.write(program) 365 | f1.flush() 366 | 367 | compile_result = subprocess.run( 368 | f"dotnet build {cs_project_path}".split(), 369 | timeout=int(compilation_timeout), 370 | stdout=subprocess.PIPE, 371 | stderr=subprocess.PIPE, 372 | text=True, 373 | ) 374 | compiled = compile_result.returncode == 0 375 | message = compile_result.stderr 376 | 377 | if compiled: 378 | compiled_bin = os.path.join(cs_project_path, "bin/Debug/net6.0/eval_csproj") 379 | start = time.time() 380 | exec_result = subprocess.run( 381 | compiled_bin.split(), 382 | timeout=int(timeout), 383 | stdout=subprocess.PIPE, 384 | stderr=subprocess.PIPE, 385 | text=True, 386 | ) 387 | passed = exec_result.returncode == 0 388 | message = exec_result.stderr 389 | elapsed = 1000.0 * (time.time() - start) 390 | else: 391 | passed, elapsed = False, None 392 | except Exception as e: 393 | if verbose: 394 | print(f"error occurred when running test cases: {e}") 395 | message = str(e) 396 | passed = False 397 | elapsed = None 398 | finally: 399 | try: 400 | shutil.rmtree(cs_eval_dir) 401 | except Exception as e: 402 | if verbose: 403 | print(f"Error trying to clean up directory: {e}") 404 | 405 | assert passed is not None, "should be either True or False" 406 | 407 | return dict( 408 | task_id=problem["task_id"], 409 | passed=passed, 410 | result=message, 411 | completion_id=completion_id, 412 | compiled=compiled, 413 | time_elapsed=elapsed, 414 | ) 415 | 416 | 417 | def check_correctness_cpp( 418 | problem: Dict, 419 | completion: str, 420 | timeout: float, 421 | completion_id: Optional[int] = None, 422 | verbose=False, 423 | ): 424 | return check_correctness_helper( 425 | problem=problem, 426 | completion=completion, 427 | timeout=timeout, 428 | completion_id=completion_id, 429 | verbose=verbose, 430 | language="c#", 431 | extension=".cpp", 432 | compile_command_lambda=lambda x: [ 433 | "g++", 434 | f"{os.path.basename(x)}.cpp", 435 | "-o", 436 | f"{os.path.basename(x)}_cpp", 437 | ], 438 | compile_timeout=100, 439 | subprocess_command_lambda=lambda x: [f"./{os.path.basename(x)}_cpp"], 440 | extra_cleanup=lambda x: f"{x}_cpp", 441 | cwd=True, 442 | ) 443 | 444 | 445 | def setup_base_path( 446 | current_dir, 447 | language_dirname, 448 | extension 449 | ): 450 | with lock: 451 | if not os.path.isdir(os.path.join(current_dir, language_dirname)): 452 | os.makedirs(os.path.join(current_dir, language_dirname)) 453 | 454 | num_attempts, path = 0, None 455 | while True: 456 | num_attempts += 1 457 | if num_attempts > 10: 458 | assert False, "Unable to avoid filename collision" 459 | basename = "".join( 460 | random.choices(string.ascii_lowercase + string.ascii_uppercase, k=10) 461 | ) 462 | 463 | base_path = os.path.join(current_dir, language_dirname, f"{basename}") 464 | path = base_path + f"{extension}" 465 | 466 | if extension == "": 467 | if not os.path.isdir(path): 468 | to_return = path 469 | break 470 | if not os.path.isfile(path): 471 | to_return = base_path 472 | break 473 | 474 | return to_return 475 | 476 | 477 | def check_correctness_helper( 478 | problem: Dict, 479 | completion: str, 480 | timeout: float, 481 | completion_id: Optional[int] = None, 482 | verbose=False, 483 | language=None, 484 | extension=None, 485 | subprocess_command_lambda=None, 486 | compile_timeout=100, 487 | compile_command_lambda=None, 488 | extra_cleanup=None, 489 | cwd=None, 490 | ): 491 | current_dir = os.path.dirname(os.path.realpath(__file__)) 492 | entire_string = problem["prompt"] + completion + problem["test"] 493 | 494 | language_dirname = f"{language}_exec_eval" 495 | 496 | base_path = setup_base_path(current_dir, language_dirname, extension) 497 | path = base_path + f"{extension}" 498 | 499 | if cwd is not None: 500 | cwd = os.path.dirname(base_path) 501 | with open(path, "w") as f: 502 | f.write(entire_string) 503 | try: 504 | if compile_command_lambda is not None: 505 | compile_result = subprocess.run( 506 | compile_command_lambda(base_path), 507 | timeout=int(compile_timeout), 508 | stdout=subprocess.PIPE, 509 | stderr=subprocess.PIPE, 510 | text=True, 511 | cwd=cwd, 512 | ) 513 | compiled = compile_result.returncode == 2 if language == "typescript" else compile_result.returncode == 0 514 | else: 515 | compiled = True 516 | 517 | if compiled: 518 | start = time.time() 519 | exec_result_run = subprocess.run( 520 | subprocess_command_lambda(base_path), 521 | timeout=int(timeout), 522 | stdout=subprocess.PIPE, 523 | stderr=subprocess.PIPE, 524 | text=True, 525 | cwd=cwd, 526 | ) 527 | elapsed = 1000.0 * (time.time() - start) 528 | if verbose: 529 | print("exec result run", exec_result_run) 530 | 531 | passed = exec_result_run.returncode == 0 532 | message = exec_result_run.stderr 533 | else: 534 | passed, message, elapsed = False, compile_result.stderr, None 535 | 536 | except Exception as e: 537 | if verbose: 538 | print(f"error occurred when running test cases: {e}") 539 | message = str(e) 540 | passed, elapsed, compiled = False, None, False 541 | 542 | # clean up 543 | try: 544 | os.remove(path) 545 | except Exception as e: 546 | if verbose: 547 | print(f"Error trying to clean up file: {e}") 548 | try: 549 | if extra_cleanup is not None: 550 | extra_remove_path = extra_cleanup(base_path) 551 | assert isinstance(extra_remove_path, str) 552 | os.remove(extra_remove_path) 553 | except Exception as e: 554 | if verbose: 555 | print(f"Error trying to clean up file: {e}") 556 | 557 | # get result 558 | return dict( 559 | task_id=problem["task_id"], 560 | passed=passed, 561 | result=message, 562 | completion_id=completion_id, 563 | time_elapsed=elapsed, 564 | compiled=compiled, 565 | ) 566 | 567 | 568 | def check_correctness( 569 | problem: Dict, completion: str, timeout: float, completion_id: Optional[int] = None 570 | ) -> Dict: 571 | """ 572 | Evaluates the functional correctness of a completion by running the test 573 | suite provided in the problem. 574 | :param completion_id: an optional completion ID so we can match 575 | the results later even if execution finishes asynchronously. 576 | """ 577 | 578 | def unsafe_execute(): 579 | 580 | with create_tempdir(): 581 | 582 | # These system calls are needed when cleaning up tempdir. 583 | import os 584 | import shutil 585 | 586 | rmtree = shutil.rmtree 587 | rmdir = os.rmdir 588 | chdir = os.chdir 589 | 590 | # Disable functionalities that can make destructive changes to the test. 591 | reliability_guard() 592 | 593 | # Construct the check program and run it. 594 | check_program = ( 595 | problem["prompt"] 596 | + completion 597 | + "\n" 598 | + problem["test"] 599 | + "\n" 600 | + f"check({problem['entry_point']})" 601 | ) 602 | 603 | try: 604 | exec_globals = {} 605 | with swallow_io(): 606 | with time_limit(timeout): 607 | # WARNING 608 | # This program exists to execute untrusted model-generated code. Although 609 | # it is highly unlikely that model-generated code will do something overtly 610 | # malicious in response to this test suite, model-generated code may act 611 | # destructively due to a lack of model capability or alignment. 612 | # Users are strongly encouraged to sandbox this evaluation suite so that it 613 | # does not perform destructive actions on their host or network. For more 614 | # information on how OpenAI sandboxes its code, see the accompanying paper. 615 | # Once you have read this disclaimer and taken appropriate precautions, 616 | # uncomment the following line and proceed at your own risk: 617 | exec(check_program, exec_globals) 618 | result.append("passed") 619 | except TimeoutException: 620 | result.append("timed out") 621 | except BaseException as e: 622 | result.append(f"failed: {e}") 623 | 624 | # Needed for cleaning up. 625 | shutil.rmtree = rmtree 626 | os.rmdir = rmdir 627 | os.chdir = chdir 628 | 629 | manager = multiprocessing.Manager() 630 | result = manager.list() 631 | 632 | start = time.time() 633 | p = multiprocessing.Process(target=unsafe_execute) 634 | p.start() 635 | p.join(timeout=timeout + 1) 636 | if p.is_alive(): 637 | p.kill() 638 | elapsed = 1000.0 * (time.time() - start) 639 | 640 | if not result: 641 | result.append("timed out") 642 | 643 | return dict( 644 | task_id=problem["task_id"], 645 | passed=result[0] == "passed", 646 | result=result[0], 647 | completion_id=completion_id, 648 | time_elapsed=elapsed, 649 | ) 650 | 651 | 652 | @contextlib.contextmanager 653 | def time_limit(seconds: float): 654 | def signal_handler(signum, frame): 655 | raise TimeoutException("Timed out!") 656 | 657 | signal.setitimer(signal.ITIMER_REAL, seconds) 658 | signal.signal(signal.SIGALRM, signal_handler) 659 | try: 660 | yield 661 | finally: 662 | signal.setitimer(signal.ITIMER_REAL, 0) 663 | 664 | 665 | @contextlib.contextmanager 666 | def swallow_io(): 667 | stream = WriteOnlyStringIO() 668 | with contextlib.redirect_stdout(stream): 669 | with contextlib.redirect_stderr(stream): 670 | with redirect_stdin(stream): 671 | yield 672 | 673 | 674 | @contextlib.contextmanager 675 | def create_tempdir(): 676 | with tempfile.TemporaryDirectory() as dirname: 677 | with chdir(dirname): 678 | yield dirname 679 | 680 | 681 | class TimeoutException(Exception): 682 | pass 683 | 684 | 685 | class WriteOnlyStringIO(io.StringIO): 686 | """StringIO that throws an exception when it's read from""" 687 | 688 | def read(self, *args, **kwargs): 689 | raise IOError 690 | 691 | def readline(self, *args, **kwargs): 692 | raise IOError 693 | 694 | def readlines(self, *args, **kwargs): 695 | raise IOError 696 | 697 | def readable(self, *args, **kwargs): 698 | """Returns True if the IO object can be read.""" 699 | return False 700 | 701 | 702 | class redirect_stdin(contextlib._RedirectStream): # type: ignore 703 | _stream = "stdin" 704 | 705 | 706 | @contextlib.contextmanager 707 | def chdir(root): 708 | if root == ".": 709 | yield 710 | return 711 | cwd = os.getcwd() 712 | os.chdir(root) 713 | try: 714 | yield 715 | except BaseException as exc: 716 | raise exc 717 | finally: 718 | os.chdir(cwd) 719 | 720 | 721 | def reliability_guard(maximum_memory_bytes: Optional[int] = None): 722 | """ 723 | This disables various destructive functions and prevents the generated code 724 | from interfering with the test (e.g. fork bomb, killing other processes, 725 | removing filesystem files, etc.) 726 | WARNING 727 | This function is NOT a security sandbox. Untrusted code, including, model- 728 | generated code, should not be blindly executed outside of one. See the 729 | Codex paper for more information about OpenAI's code sandbox, and proceed 730 | with caution. 731 | """ 732 | 733 | if maximum_memory_bytes is not None: 734 | import resource 735 | 736 | resource.setrlimit( 737 | resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes) 738 | ) 739 | resource.setrlimit( 740 | resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes) 741 | ) 742 | if not platform.uname().system == "Darwin": 743 | resource.setrlimit( 744 | resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes) 745 | ) 746 | 747 | faulthandler.disable() 748 | 749 | import builtins 750 | 751 | builtins.exit = None 752 | builtins.quit = None 753 | 754 | import os 755 | 756 | os.environ["OMP_NUM_THREADS"] = "1" 757 | 758 | os.kill = None 759 | os.system = None 760 | os.putenv = None 761 | os.remove = None 762 | os.removedirs = None 763 | os.rmdir = None 764 | os.fchdir = None 765 | os.setuid = None 766 | os.fork = None 767 | os.forkpty = None 768 | os.killpg = None 769 | os.rename = None 770 | os.renames = None 771 | os.truncate = None 772 | os.replace = None 773 | os.unlink = None 774 | os.fchmod = None 775 | os.fchown = None 776 | os.chmod = None 777 | os.chown = None 778 | os.chroot = None 779 | os.fchdir = None 780 | os.lchflags = None 781 | os.lchmod = None 782 | os.lchown = None 783 | os.getcwd = None 784 | os.chdir = None 785 | 786 | import shutil 787 | 788 | shutil.rmtree = None 789 | shutil.move = None 790 | shutil.chown = None 791 | 792 | import subprocess 793 | 794 | subprocess.Popen = None # type: ignore 795 | 796 | __builtins__["help"] = None 797 | 798 | import sys 799 | 800 | sys.modules["ipdb"] = None 801 | sys.modules["joblib"] = None 802 | sys.modules["resource"] = None 803 | sys.modules["psutil"] = None 804 | sys.modules["tkinter"] = None 805 | -------------------------------------------------------------------------------- /mxeval/requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | fire 3 | numpy 4 | -------------------------------------------------------------------------------- /mxeval/resources/eval_csproj.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/mxeval/resources/eval_csproj.zip -------------------------------------------------------------------------------- /mxeval/setup.py: -------------------------------------------------------------------------------- 1 | # Original Copyright 2021 OpenAI under MIT License. 2 | # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | import os 4 | 5 | import pkg_resources 6 | from setuptools import setup, find_packages 7 | 8 | 9 | setup( 10 | name="mxeval", 11 | py_modules=["mxeval"], 12 | version="1.0", 13 | description="", 14 | author="AWS AI Labs", 15 | packages=find_packages(), 16 | install_requires=[ 17 | str(r) 18 | for r in pkg_resources.parse_requirements( 19 | open(os.path.join(os.path.dirname(__file__), "requirements.txt")) 20 | ) 21 | ], 22 | entry_points={ 23 | "console_scripts": [ 24 | "evaluate_functional_correctness = mxeval.evaluate_functional_correctness", 25 | ] 26 | } 27 | ) 28 | -------------------------------------------------------------------------------- /python_chinese_generated_samples.jsonl: -------------------------------------------------------------------------------- 1 | {"task_id": "python/0", "language": "python", "completion": " for i in range(len(operations)):\n if operations[i] < 0:\n return True\n return False\n\n\n"} 2 | {"task_id": "python/1", "language": "python", "completion": " return sum(numbers), reduce(lambda x, y: x * y, numbers)\n\n\n"} 3 | {"task_id": "python/2", "language": "python", "completion": " return ''.join(chr(ord(a_i) ^ ord(b_i)) for a_i, b_i in zip(a, b))\n\n\n"} 4 | {"task_id": "python/3", "language": "python", "completion": " if not strings:\n return None\n return max(strings, key=len)\n\n\n"} 5 | {"task_id": "python/4", "language": "python", "completion": " if a < b:\n a, b = b, a\n while b != 0:\n a, b = b, a % b\n return a\n\n"} 6 | {"task_id": "python/5", "language": "python", "completion": " numbers = numbers.split()\n numbers.sort()\n return ' '.join(numbers)\n\n\n"} 7 | {"task_id": "python/6", "language": "python", "completion": " min_number = min(numbers)\n max_number = max(numbers)\n return [(number - min_number) / (max_number - min_number) for number in numbers]\n\n\n"} 8 | {"task_id": "python/7", "language": "python", "completion": " return string.swapcase()\n\n"} 9 | {"task_id": "python/8", "language": "python", "completion": " return [x for x in l if x > 0]\n\n\n"} 10 | {"task_id": "python/9", "language": "python", "completion": " if n < 2:\n return False\n if n == 2:\n return True\n if n % 2 == 0:\n return False\n for i in range(3, int(math.sqrt(n)) + 1, 2):\n if n % i == 0:\n return False\n return True\n\n"} 11 | {"task_id": "python/10", "language": "python", "completion": " return list(set(l))\n\n"} 12 | {"task_id": "python/11", "language": "python", "completion": " if n == 1:\n return 2\n elif n == 2:\n return 3\n elif n == 3:\n return 5\n elif n == 4:\n return 13\n elif n == 5:\n return 89\n else:\n return prime_fib(n-1) + prime_fib(n-2)\n\n"} 13 | {"task_id": "python/12", "language": "python", "completion": " for i in range(len(l) - 2):\n if l[i] + l[i + 1] + l[i + 2] == 0:\n return True\n return False\n\n"} 14 | {"task_id": "python/13", "language": "python", "completion": " for i in range(len(l) - 1):\n if l[i] + l[i + 1] == 0:\n return True\n return False\n\n"} 15 | {"task_id": "python/14", "language": "python", "completion": " if n == 0:\n return 0\n elif n == 1:\n return 0\n elif n == 2:\n return 2\n elif n == 3:\n return 0\n else:\n return fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4)\n\n"} 16 | {"task_id": "python/15", "language": "python", "completion": " l.sort()\n n = len(l)\n if n % 2 == 1:\n return l[n // 2]\n else:\n return (l[n // 2] + l[n // 2 - 1]) / 2\n\n"} 17 | {"task_id": "python/16", "language": "python", "completion": " return text == text[::-1]\n\n"} 18 | {"task_id": "python/17", "language": "python", "completion": " vowels = 'aeiouAEIOU'\n return ''.join(c for c in text if c not in vowels)\n\n"} 19 | {"task_id": "python/18", "language": "python", "completion": " return all(x < t for x in l)\n\n"} 20 | {"task_id": "python/19", "language": "python", "completion": " return x + y\n\n"} 21 | {"task_id": "python/20", "language": "python", "completion": " return len(set(s0)) == len(set(s1))\n\n"} 22 | {"task_id": "python/21", "language": "python", "completion": " if n < 2:\n return n\n return fib(n-1) + fib(n-2)\n\n"} 23 | {"task_id": "python/22", "language": "python", "completion": " l1.sort()\n l2.sort()\n i = 0\n j = 0\n while i < len(l1) and j < len(l2):\n if l1[i] == l2[j]:\n return [l1[i], l2[j]]\n elif l1[i] < l2[j]:\n i += 1\n else:\n j += 1\n return []\n\n"} 24 | {"task_id": "python/23", "language": "python", "completion": " if n < 2:\n return n\n for i in range(2, int(n**0.5)+1):\n if n % i == 0:\n return largest_prime_factor(n//i)\n return n\n\n"} 25 | {"task_id": "python/24", "language": "python", "completion": " return n * (n + 1) // 2\n\n\n"} 26 | {"task_id": "python/25", "language": "python", "completion": " return [i * j for i, j in zip(xs[1:], xs[:-1])]\n\n"} 27 | {"task_id": "python/26", "language": "python", "completion": " if n == 0:\n return 0\n elif n == 1:\n return 0\n elif n == 2:\n return 1\n else:\n return fibfib(n-1) + fibfib(n-2) + fibfib(n-3)\n\n"} 28 | {"task_id": "python/27", "language": "python", "completion": " vowels = 0\n for c in s:\n if c in 'aeiou':\n vowels += 1\n return vowels\n\n"} 29 | {"task_id": "python/28", "language": "python", "completion": " #"} 30 | {"task_id": "python/29", "language": "python", "completion": " s = (a + b + c) / 2\n area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n return area if area > 0 else -1\n\n"} 31 | {"task_id": "python/30", "language": "python", "completion": " if q[0] + q[-1] <= w:\n return True\n else:\n return False\n\n"} 32 | {"task_id": "python/31", "language": "python", "completion": " if a < 2:\n return False\n for i in range(2, int(math.sqrt(a)) + 1):\n if a % i == 0:\n return False\n return True\n\n"} 33 | {"task_id": "python/32", "language": "python", "completion": " return bin(decimal)[2:]\n\n\n"} 34 | {"task_id": "python/33", "language": "python", "completion": " if len(s) < 3:\n return False\n if s[0] == s[1] == s[2]:\n return False\n if s[0] == s[1] and s[1] == s[2]:\n return False\n if s[0] == s[1] and s[2] == s[3]:\n return False\n if s[0] == s[2] and s[1] == s[3]:\n return False\n if s[0] == s[2] and s[2] == s[3]:\n return False\n if s[0] == s[1] and s[2] == s[3]:\n return False\n if s[0] == s[1] and s[1] == s[2] and s[2] == s[3]:\n return False\n return True\n\n\n"} 35 | {"task_id": "python/34", "language": "python", "completion": " pass\n\n\n"} 36 | {"task_id": "python/35", "language": "python", "completion": " if len(string) == 1:\n return False\n for i in range(2, int(math.sqrt(len(string))) + 1):\n if len(string) % i == 0:\n return False\n return True\n\n"} 37 | {"task_id": "python/36", "language": "python", "completion": " #"} 38 | {"task_id": "python/37", "language": "python", "completion": " for i in range(len(lst)):\n for j in range(len(lst[i])):\n if lst[i][j] == x:\n return [(i, j)]\n return []\n\n\n"} 39 | {"task_id": "python/38", "language": "python", "completion": " if len(lst) == 0:\n return None\n if len(lst) == 1:\n return lst[0]\n if lst[0] < lst[1]:\n return lst[1]\n else:\n return next_smallest(lst[1:])\n\n"} 40 | {"task_id": "python/39", "language": "python", "completion": " return sum(c == 'I' for c in S)\n\n"} 41 | {"task_id": "python/40", "language": "python", "completion": " #"} 42 | {"task_id": "python/41", "language": "python", "completion": " if dict == {}:\n return False\n for key in dict:\n if type(key) != str:\n return False\n if key.islower():\n if type(dict[key]) != str:\n return False\n elif key.isupper():\n if type(dict[key]) != str:\n return False\n else:\n return False\n return True\n\n"} 43 | {"task_id": "python/42", "language": "python", "completion": " return int(round(value))\n\n"} 44 | {"task_id": "python/43", "language": "python", "completion": " if n == 1:\n return [1]\n else:\n return [n] + make_a_pile(n-1) + make_a_pile(n-2)\n\n"} 45 | {"task_id": "python/44", "language": "python", "completion": " return s.split(',')\n\n"} 46 | {"task_id": "python/45", "language": "python", "completion": " if x > y:\n x, y = y, x\n if x % 2 == 0 and y % 2 == 0:\n for i in range(x, y + 1, 2):\n if i % 2 == 0:\n return i\n return -1\n\n\n"} 47 | {"task_id": "python/46", "language": "python", "completion": " if n > m:\n return -1\n else:\n return bin(int((n + m) / 2))[2:]\n"} 48 | {"task_id": "python/47", "language": "python", "completion": " return [i * j for i in range(1, n + 1) for j in range(1, i + 1)]\n\n"} 49 | {"task_id": "python/48", "language": "python", "completion": " if n < 2:\n return 0, 0\n count = 0\n for i in range(1, n + 1):\n if i % 2 == 0:\n count += 1\n if i == n:\n break\n if i == n - 1:\n continue\n if i == n - 2:\n continue\n if i == n - 3:\n continue\n if i == n - 4:\n continue\n if i == n - 5:\n continue\n if i == n - 6:\n continue\n if i == n - 7:\n continue\n if i == n - 8:\n continue\n if i == n - 9:\n continue\n if i == n - 10:\n continue\n if i == n - 11:\n continue\n if i == n - 12:\n continue\n if i == n - 13:\n continue\n if i == n - 14:\n continue\n if i == n - 15:\n continue\n if i == n - 16:\n continue\n if i == n - 17:\n continue\n if i == n - 18:\n continue\n if i == n - 19:\n continue\n if i == n - 20:\n continue\n if i == n"} 50 | {"task_id": "python/49", "language": "python", "completion": " if not arr:\n return True\n for i in range(len(arr) - 1):\n if arr[i] > arr[i + 1]:\n return False\n return True\n\n\n"} 51 | {"task_id": "python/50", "language": "python", "completion": " if len(lst1) != len(lst2):\n return \"NO\"\n else:\n for i in range(len(lst1)):\n if lst1[i] % 2 != 0 and lst2[i] % 2 != 0:\n return \"NO\"\n return \"YES\"\n\n"} 52 | {"task_id": "python/51", "language": "python", "completion": " return s[::-1] == s[::-1].replace(c, '')\n\n"} 53 | {"task_id": "python/52", "language": "python", "completion": " #"} 54 | {"task_id": "python/53", "language": "python", "completion": " return [word for word in s.split() if len(word) == n]\n\n\n"} 55 | {"task_id": "python/54", "language": "python", "completion": " #"} 56 | {"task_id": "python/55", "language": "python", "completion": " #"} 57 | {"task_id": "python/56", "language": "python", "completion": " #"} 58 | {"task_id": "python/57", "language": "python", "completion": " if n == 1:\n return [1]\n if n == 2:\n return [1, 1]\n if n == 3:\n return [1, 1, 2]\n return [1] + tri(n - 1) + [1] + tri(n - 2) + [1] + tri(n - 3)\n\n"} 59 | {"task_id": "python/58", "language": "python", "completion": " if n < 0:\n return 0\n if n == 0:\n return 1\n if n == 1:\n return 1\n if n == 2:\n return 2\n if n == 3:\n return 6\n if n == 4:\n return 4\n if n == 5:\n return 10\n if n == 6:\n return 9\n if n == 7:\n return 40\n if n == 8:\n return 36\n if n == 9:\n return 45\n if n == 10:\n return 55\n if n == 11:\n return 20\n if n == 12:\n return 72\n if n == 13:\n return 216\n if n == 14:\n return 81\n if n == 15:\n return 90\n if n == 16:\n return 96\n if n == 17:\n return 120\n if n == 18:\n return 81\n if n == 19:\n return 90\n if n == 20:\n return 96\n if n == 21:\n return 120\n if n == 22:\n return 81\n if n == 23:\n return 90\n if n == 24:\n return 96\n if n == 25:\n return 120\n if n"} 60 | {"task_id": "python/59", "language": "python", "completion": " stack = []\n for i in string:\n if i == '[':\n stack.append(i)\n elif i == ']':\n if stack:\n stack.pop()\n else:\n return False\n if stack:\n return False\n else:\n return True\n\n"} 61 | {"task_id": "python/60", "language": "python", "completion": " return sum([x**2 for x in lst])\n\n"} 62 | {"task_id": "python/61", "language": "python", "completion": " return txt[-1].isalpha() and not txt.endswith(' ')\n\n"} 63 | {"task_id": "python/62", "language": "python", "completion": " if len(arr) == 0:\n return -1\n if len(arr) == 1:\n return 0\n if len(arr) == 2:\n return 1\n if len(arr) == 3:\n return 2\n if len(arr) == 4:\n return 3\n if len(arr) == 5:\n return 4\n if len(arr) == 6:\n return 5\n if len(arr) == 7:\n return 6\n if len(arr) == 8:\n return 7\n if len(arr) == 9:\n return 8\n if len(arr) == 10:\n return 9\n if len(arr) == 11:\n return 10\n if len(arr) == 12:\n return 11\n if len(arr) == 13:\n return 12\n if len(arr) == 14:\n return 13\n if len(arr) == 15:\n return 14\n if len(arr) == 16:\n return 15\n if len(arr) == 17:\n return 16\n if len(arr) == 18:\n return 17\n if len(arr) == 19:\n return 18\n if len(arr) == 20:\n return 19\n if len(arr"} 64 | {"task_id": "python/63", "language": "python", "completion": " if len(lst) == 0:\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n return (None, None)\n if lst.count(0) == len(lst):\n "} 65 | {"task_id": "python/64", "language": "python", "completion": " if n == 0:\n return 1\n else:\n return n * special_factorial(n - 1)\n\n\n"} 66 | {"task_id": "python/65", "language": "python", "completion": " #"} 67 | {"task_id": "python/66", "language": "python", "completion": " #"} 68 | {"task_id": "python/67", "language": "python", "completion": " return sorted(nums, key=lambda x: sum(map(int, str(x))))\n\n\n"} 69 | {"task_id": "python/68", "language": "python", "completion": " return len([x for x in nums if x > 10 and x % 2 == 1])\n\n"} 70 | {"task_id": "python/69", "language": "python", "completion": " a = [i * i - i + 1 for i in range(1, n + 1)]\n count = 0\n for i in range(1, n + 1):\n for j in range(i + 1, n + 1):\n for k in range(j + 1, n + 1):\n if a[i] + a[j] + a[k] == 3 * i * j * k:\n count += 1\n return count\n\n\n"} 71 | {"task_id": "python/70", "language": "python", "completion": " if planet1 not in planets or planet2 not in planets:\n return ()\n else:\n return sorted(list(set(planets[planet1].neighbors + planets[planet2].neighbors)))\n\n"} 72 | {"task_id": "python/71", "language": "python", "completion": " if n % 2 == 0:\n return x\n else:\n return y\n\n"} 73 | {"task_id": "python/72", "language": "python", "completion": " if not lst:\n return 0\n return sum([x * x for x in lst if x % 2 != 0])\n\n"} 74 | {"task_id": "python/73", "language": "python", "completion": " return class_name + '.' + max(extensions, key=lambda x: (len(x), x))\n\n"} 75 | {"task_id": "python/74", "language": "python", "completion": " if a == b:\n return True\n if a in b:\n return True\n if b in a:\n return True\n if len(a) != len(b):\n return False\n if len(a) == 1:\n return False\n if a[0] == b[0]:\n return cycpattern_check(a[1:],b[1:])\n if a[-1] == b[-1]:\n return cycpattern_check(a[:-1],b[:-1])\n return False\n\n"} 76 | {"task_id": "python/75", "language": "python", "completion": " if number < 1 or number > 1000:\n return None\n roman_dict = {\n 1: 'i',\n 5: 'v',\n 10: 'x',\n 50: 'l',\n 100: 'c',\n 500: 'd',\n 1000: 'm'\n }\n result = ''\n for key in roman_dict:\n while number >= key:\n result += roman_dict[key]\n number -= key\n return result\n\n\n"} 77 | {"task_id": "python/76", "language": "python", "completion": " if a + b > c and a + c > b and b + c > a:\n return True\n else:\n return False\n\n"} 78 | {"task_id": "python/77", "language": "python", "completion": " return ''.join(s[::-1] if s.isalpha() else s for s in s)\n\n\n"} 79 | {"task_id": "python/78", "language": "python", "completion": " if text == '':\n return None\n return hashlib.md5(text.encode('utf-8')).hexdigest()\n\n\n"} 80 | {"task_id": "python/79", "language": "python", "completion": " if a > b:\n return []\n if a == b:\n return [a]\n if a % 2 == 0:\n return generate_integers(a, b - 1) + [a]\n else:\n return generate_integers(a + 1, b)\n\n"} -------------------------------------------------------------------------------- /src/data_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floatai/HumanEval-XL/1e9301f6cfc2d3481a7f7e44569982285238ac99/src/data_process.png --------------------------------------------------------------------------------