├── .gitattributes ├── example.png ├── juice-github-repos.tar.gz ├── Dockerfile ├── src ├── requirements.txt ├── data_science_problems │ ├── evaluate_dsp.py │ ├── progress.py │ ├── read.py │ ├── utils.py │ └── execution.py ├── setup.py └── data-science-notebooks.txt ├── .github ├── dependabot.yml └── workflows │ ├── main.yml │ ├── pypi-publish.yml │ ├── pylint.yml │ ├── mypy.yml │ ├── pyright.yml │ ├── black.yml │ └── pypi-test-publish.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── .gitignore ├── SECURITY.md ├── .pylintrc └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | juice-github-repos.tar.gz filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/DataScienceProblems/HEAD/example.png -------------------------------------------------------------------------------- /juice-github-repos.tar.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:92859fc9076f8d0717918a73420cc2b03329509b131fd74c058e707e49fb336d 3 | size 3131051591 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (C) Microsoft Corporation. All rights reserved. 2 | 3 | FROM continuumio/miniconda3 4 | 5 | 6 | WORKDIR /app 7 | COPY src /app 8 | 9 | RUN pip install -e . 10 | 11 | ENTRYPOINT ["evaluate_dsp"] -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | tqdm 3 | nbclient 4 | nbformat 5 | ray 6 | fire 7 | jupyter 8 | matplotlib 9 | pandas 10 | pytest 11 | scikit-learn 12 | nose 13 | sympy 14 | nltk 15 | seaborn 16 | scikit-image 17 | SQLAlchemy 18 | altair 19 | bs4 20 | torch -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "13:00" 8 | open-pull-requests-limit: 10 9 | assignees: 10 | - dciborow 11 | - mattchansky 12 | allow: 13 | - dependency-type: direct 14 | - dependency-type: indirect 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: wemake-python-styleguide 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | workflow_dispatch: 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: wemake-python-styleguide 17 | uses: wemake-services/wemake-python-styleguide@0.15.3 18 | env: 19 | GITHUB_TOKEN: ${{ github.token }} 20 | with: 21 | path: ai-python-package 22 | reporter: github-pr-review 23 | -------------------------------------------------------------------------------- /src/data_science_problems/evaluate_dsp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import fire 5 | import sys 6 | 7 | from data_science_problems.execution import evaluate_dsp 8 | 9 | 10 | def entry_point( 11 | sample_file: str, 12 | k: str = "1,10,100", 13 | ): 14 | """ 15 | Evaluates the functional correctness of generated samples. 16 | """ 17 | k = list(map(int, k.split(","))) 18 | results = evaluate_dsp(sample_file, k) 19 | print(results) 20 | 21 | 22 | def main(): 23 | fire.Fire(entry_point) 24 | 25 | sys.exit(main()) 26 | -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | 6 | import pkg_resources 7 | from setuptools import setup, find_packages 8 | 9 | setup( 10 | name="DataScienceProblems", 11 | py_modules=["DataScienceProblems"], 12 | version="1.0", 13 | description="", 14 | author="Microsoft", 15 | packages=find_packages(), 16 | install_requires=[ 17 | str(r) 18 | for r in pkg_resources.parse_requirements( 19 | open(os.path.join(os.path.dirname(__file__), "requirements.txt")) 20 | ) 21 | ], 22 | entry_points={ 23 | "console_scripts": [ 24 | "evaluate_dsp = data_science_problems.evaluate_dsp", 25 | ] 26 | } 27 | ) 28 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: '3.x' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel twine 20 | python setup.py sdist bdist_wheel 21 | - name: Test dependencies 22 | run: | 23 | pip install -e .[all] 24 | 25 | - name: Publish package 26 | uses: pypa/gh-action-pypi-publish@release/v1 27 | with: 28 | user: __token__ 29 | password: ${{ secrets.PYPI_PASSWORD }} 30 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: PyLint 2 | on: 3 | # Triggers the workflow on push or pull request events but only for the main branch 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | jobs: 13 | pylint: 14 | name: runner / pylint 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v2 18 | - uses: dciborow/action-pylint@0.0.3 19 | with: 20 | github_token: ${{ secrets.github_token }} 21 | # Change reviewdog reporter if you need [github-pr-check,github-check,github-pr-review]. 22 | reporter: github-pr-review 23 | # Change reporter level if you need. 24 | # GitHub Status Check won't become failure with warning. 25 | workdir: ai-python-package 26 | level: warning 27 | -------------------------------------------------------------------------------- /.github/workflows/mypy.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: MyPy 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | jobs: 16 | linter_name: 17 | name: runner / black formatter 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Run mypy with reviewdog 22 | # You may pin to the exact commit or the version. 23 | # uses: tsuyoshicho/action-mypy@2160e947d397ac0be7f02c911c3a5bea3f498575 24 | uses: tsuyoshicho/action-mypy@v3.1.0 25 | with: 26 | reporter: github-pr-review 27 | workdir: ai-python-package 28 | -------------------------------------------------------------------------------- /.github/workflows/pyright.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: pyright 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | jobs: 16 | linter_name: 17 | name: runner / black formatter 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v2 21 | - uses: jordemort/action-pyright@v1 22 | with: 23 | github_token: ${{ secrets.GITHUB_TOKEN }} # You need this 24 | reporter: github-pr-review # Change reporter. 25 | lib: true 26 | - uses: ricardochaves/python-lint@v1.4.0 27 | with: 28 | python-root-list: "ai-python-package" 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to, 5 | and actually do, grant us the rights to use your contribution. For details, visit 6 | https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need 9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the 10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | -------------------------------------------------------------------------------- /.github/workflows/black.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Black 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | jobs: 16 | linter_name: 17 | name: runner / black formatter 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Check files using the black formatter 22 | uses: rickstaa/action-black@v1 23 | id: action_black 24 | with: 25 | black_args: "." 26 | - name: Annotate diff changes using reviewdog 27 | if: steps.action_black.outputs.is_formatted == 'true' 28 | uses: reviewdog/action-suggester@v1 29 | with: 30 | tool_name: blackfmt 31 | fail_on_error: true 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) Microsoft Corporation. 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/pypi-test-publish.yml: -------------------------------------------------------------------------------- 1 | name: Test Upload Python Package 2 | 3 | on: 4 | workflow_dispatch 5 | 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | with: 12 | python-version: '3.7' 13 | - name: Setup Python 14 | uses: actions/setup-python@v2.2.2 15 | with: 16 | python-version: 3.7 17 | - name: Install dependencies 18 | run: | 19 | python3.7 -m pip install --upgrade pip 20 | python3.7 -m pip install virtualenv pip-tools 21 | sudo add-apt-repository -y ppa:ubuntugis/ppa \ 22 | && sudo apt install -y \ 23 | gdal-bin libgdal-dev 24 | 25 | mv requirements.txt requirements.in 26 | touch requirements.txt 27 | echo "" >> requirements.in 28 | cat requirements.linked.in >> requirements.in 29 | pip-compile -v requirements.in 30 | 31 | cd ai-python 32 | 33 | requirements=$(find . -type f -name "*requirements*" | paste -sd " ") 34 | 35 | for req in $requirements 36 | do 37 | mv $req temp.in 38 | echo "-c ../requirements.txt" >> temp.in 39 | touch $req 40 | pip-compile temp.in -o $req 41 | done 42 | 43 | cd .. 44 | 45 | python setup.py sdist bdist_wheel 46 | - name: Publish package 47 | uses: pypa/gh-action-pypi-publish@release/v1 48 | with: 49 | user: __token__ 50 | password: ${{ secrets.TEST_PYPI_PASSWORD }} 51 | repository_url: https://test.pypi.org/legacy/ 52 | - name: Install dependencies 53 | run: | 54 | python3.7 -m pip install --upgrade pip 55 | python3.7 -m pip install virtualenv pip-tools 56 | pip-compile --extra-index-url https://test.pypi.org/simple 57 | ai-python-package[tests] 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | juice-github-repos/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /src/data_science_problems/progress.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from asyncio import Event 5 | from typing import Tuple 6 | from time import sleep 7 | 8 | import ray 9 | # For typing purposes 10 | from ray.actor import ActorHandle 11 | from tqdm import tqdm 12 | 13 | 14 | @ray.remote 15 | class ProgressBarActor: 16 | counter: int 17 | delta: int 18 | event: Event 19 | 20 | def __init__(self) -> None: 21 | self.counter = 0 22 | self.delta = 0 23 | self.event = Event() 24 | 25 | def update(self, num_items_completed: int) -> None: 26 | """Updates the ProgressBar with the incremental 27 | number of items that were just completed. 28 | """ 29 | self.counter += num_items_completed 30 | self.delta += num_items_completed 31 | self.event.set() 32 | 33 | async def wait_for_update(self) -> Tuple[int, int]: 34 | """Blocking call. 35 | 36 | Waits until somebody calls `update`, then returns a tuple of 37 | the number of updates since the last call to 38 | `wait_for_update`, and the total number of completed items. 39 | """ 40 | await self.event.wait() 41 | self.event.clear() 42 | saved_delta = self.delta 43 | self.delta = 0 44 | return saved_delta, self.counter 45 | 46 | def get_counter(self) -> int: 47 | """ 48 | Returns the total number of complete items. 49 | """ 50 | return self.counter 51 | 52 | 53 | class ProgressBar: 54 | progress_actor: ActorHandle 55 | total: int 56 | description: str 57 | pbar: tqdm 58 | 59 | def __init__(self, total: int, description: str = ""): 60 | # Ray actors don't seem to play nice with mypy, generating 61 | # a spurious warning for the following line, 62 | # which we need to suppress. The code is fine. 63 | self.progress_actor = ProgressBarActor.remote() # type: ignore 64 | self.total = total 65 | self.description = description 66 | 67 | @property 68 | def actor(self) -> ActorHandle: 69 | """Returns a reference to the remote `ProgressBarActor`. 70 | 71 | When you complete tasks, call `update` on the actor. 72 | """ 73 | return self.progress_actor 74 | 75 | def print_until_done(self) -> None: 76 | """Blocking call. 77 | 78 | Do this after starting a series of remote Ray tasks, to which you've 79 | passed the actor handle. Each of them calls `update` on the actor. 80 | When the progress meter reaches 100%, this method returns. 81 | """ 82 | pbar = tqdm(desc=self.description, total=self.total) 83 | while True: 84 | delta, counter = ray.get(self.actor.wait_for_update.remote()) 85 | pbar.update(delta) 86 | if counter >= self.total: 87 | pbar.close() 88 | return 89 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | extension-pkg-whitelist=numpy,torch,cv2,pyodbc,pydantic,ciso8601,netcdf4,scipy 3 | ignore=CVS 4 | ignore-patterns=test.*?py,conftest.py 5 | init-hook='import sys; sys.setrecursionlimit(8 * sys.getrecursionlimit())' 6 | jobs=0 7 | limit-inference-results=100 8 | persistent=yes 9 | suggestion-mode=yes 10 | unsafe-load-any-extension=no 11 | [MESSAGES CONTROL] 12 | confidence= 13 | enable=c-extension-no-member 14 | 15 | [REPORTS] 16 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 17 | output-format=text 18 | reports=no 19 | score=yes 20 | [REFACTORING] 21 | max-nested-blocks=5 22 | never-returning-functions=sys.exit 23 | 24 | [BASIC] 25 | argument-naming-style=snake_case 26 | attr-naming-style=snake_case 27 | bad-names=foo, 28 | bar, 29 | baz, 30 | toto, 31 | tutu, 32 | tata 33 | class-attribute-naming-style=any 34 | class-naming-style=PascalCase 35 | const-naming-style=UPPER_CASE 36 | docstring-min-length=-1 37 | function-naming-style=snake_case 38 | good-names=i, 39 | j, 40 | k, 41 | ex, 42 | Run, 43 | _, 44 | df, 45 | n, 46 | N, 47 | t, 48 | T, 49 | ax 50 | include-naming-hint=yes 51 | inlinevar-naming-style=any 52 | method-naming-style=snake_case 53 | module-naming-style=any 54 | name-group= 55 | no-docstring-rgx=^_ 56 | property-classes=abc.abstractproperty 57 | variable-naming-style=snake_case 58 | 59 | [FORMAT] 60 | expected-line-ending-format= 61 | ignore-long-lines=^\s*(# )?.*['"]?? 62 | indent-after-paren=4 63 | indent-string=' ' 64 | max-line-length=120 65 | max-module-lines=1000 66 | no-space-check=trailing-comma, 67 | dict-separator 68 | single-line-class-stmt=no 69 | single-line-if-stmt=no 70 | 71 | [LOGGING] 72 | logging-format-style=old 73 | logging-modules=logging 74 | 75 | [MISCELLANEOUS] 76 | notes=FIXME, 77 | XXX, 78 | TODO 79 | 80 | [SIMILARITIES] 81 | ignore-comments=yes 82 | ignore-docstrings=yes 83 | ignore-imports=yes 84 | min-similarity-lines=7 85 | 86 | [SPELLING] 87 | max-spelling-suggestions=4 88 | spelling-dict= 89 | spelling-ignore-words= 90 | spelling-private-dict-file= 91 | spelling-store-unknown-words=no 92 | 93 | [STRING] 94 | check-str-concat-over-line-jumps=no 95 | 96 | [TYPECHECK] 97 | contextmanager-decorators=contextlib.contextmanager 98 | generated-members=numpy.*,np.*,pyspark.sql.functions,collect_list 99 | ignore-mixin-members=yes 100 | ignore-none=yes 101 | ignore-on-opaque-inference=yes 102 | ignored-classes=optparse.Values,thread._local,_thread._local,numpy,torch,swagger_client 103 | ignored-modules=numpy,torch,swagger_client,netCDF4,scipy 104 | missing-member-hint=yes 105 | missing-member-hint-distance=1 106 | missing-member-max-choices=1 107 | signature-mutators= 108 | 109 | [VARIABLES] 110 | additional-builtins=dbutils 111 | allow-global-unused-variables=yes 112 | callbacks=cb_, 113 | _cb 114 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 115 | ignored-argument-names=_.*|^ignored_|^unused_ 116 | init-import=no 117 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 118 | 119 | [CLASSES] 120 | defining-attr-methods=__init__, 121 | __new__, 122 | setUp, 123 | __post_init__ 124 | exclude-protected=_asdict, 125 | _fields, 126 | _replace, 127 | _source, 128 | _make 129 | valid-classmethod-first-arg=cls 130 | valid-metaclass-classmethod-first-arg=cls 131 | 132 | [DESIGN] 133 | max-args=5 134 | max-attributes=7 135 | max-bool-expr=5 136 | max-branches=12 137 | max-locals=15 138 | max-parents=7 139 | max-public-methods=20 140 | max-returns=6 141 | max-statements=50 142 | min-public-methods=2 143 | 144 | [IMPORTS] 145 | allow-any-import-level= 146 | allow-wildcard-with-all=no 147 | analyse-fallback-blocks=no 148 | deprecated-modules=optparse,tkinter.tix 149 | ext-import-graph= 150 | import-graph= 151 | int-import-graph= 152 | known-standard-library= 153 | known-third-party=enchant, azureiai-logistics-inventoryplanning 154 | preferred-modules= 155 | 156 | [EXCEPTIONS] 157 | overgeneral-exceptions=BaseException, 158 | Exception 159 | -------------------------------------------------------------------------------- /src/data_science_problems/read.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from pathlib import Path 5 | from tqdm import tqdm 6 | 7 | import os 8 | import nbformat 9 | from shutil import copyfile 10 | 11 | 12 | ROOT = Path(os.path.abspath(__file__)).parent.parent 13 | DSP = ROOT / "data-science-notebooks.txt" 14 | 15 | 16 | def extract_initial_comments(tgt): 17 | tgt = tgt.strip().split("\n") 18 | for idx, t in enumerate(tgt): 19 | if t.strip() == "": 20 | continue 21 | if not t.startswith("#"): 22 | break 23 | return "\n".join(tgt[:idx]), "\n".join(tgt[idx:]) 24 | 25 | 26 | def build_examples(path, context_len=3): 27 | path = Path(path.strip()) 28 | 29 | try: 30 | nb = nbformat.read(path, as_version=4) 31 | except: 32 | copyfile("/storage/data/" / path, path) 33 | nb = nbformat.read(path, as_version=4) 34 | 35 | cells_json = nb["cells"] 36 | cells = [''.join(cell['source']) for cell in cells_json] 37 | 38 | examples = [] 39 | notebook_problem_index = 0 40 | for idx in range(len(cells)-1): 41 | cell_type = cells_json[idx]["cell_type"] 42 | i, j = max(0, idx-context_len), idx 43 | 44 | target = cells[idx] 45 | source = "\n".join(cells[i:j]) 46 | 47 | # a number of times, the inital comments in the target 48 | # contains the problem statement. need that to solve the problem 49 | comment, target = extract_initial_comments(target) 50 | source = source + "\n" + comment 51 | 52 | next = cells[idx+1] 53 | 54 | try: 55 | if cells_json[idx]["metadata"]["nbgrader"]["solution"]: 56 | if "assert" in "".join(cells_json[idx+1]["source"]): 57 | if cell_type == "code": 58 | example = { 59 | "prompt": source, 60 | "test": next, 61 | "solution": target, 62 | "notebook_path": path, 63 | "notebook_problem_index": notebook_problem_index 64 | } 65 | notebook_problem_index += 1 66 | yield example 67 | # catch if the key metadata.nbgrader.solution do not exist 68 | except Exception as e: 69 | pass 70 | return examples 71 | 72 | 73 | def build_examples_new(path, context_len=3): 74 | path = ROOT.parent / Path(path.strip()) 75 | # try: 76 | nb = nbformat.read(path, as_version=4) 77 | # except: 78 | # copyfile("/storage/data/" / path, path) 79 | # nb = nbformat.read(path, as_version=4) 80 | 81 | cells_json = nb["cells"] 82 | cells = [''.join(cell['source']) for cell in cells_json] 83 | 84 | examples = [] 85 | notebook_problem_index = 0 86 | for idx in range(len(cells)-1): 87 | cell_type = cells_json[idx]["cell_type"] 88 | i, j = max(0, idx-context_len), idx 89 | 90 | target = cells[idx] 91 | source = "\n".join(cells[i:j]) 92 | 93 | # a number of times, the inital comments in the target 94 | # contains the problem statement. need that to solve the problem 95 | comment, target = extract_initial_comments(target) 96 | source = source + "\n" + comment 97 | 98 | next = cells[idx+1] 99 | 100 | try: 101 | task_id = cells_json[idx]["metadata"]["task_id"] 102 | example = { 103 | "task_id": task_id, 104 | "prompt": source, 105 | "solution": target, 106 | "test": next, 107 | "notebook_path": str(path), 108 | "notebook_problem_index": notebook_problem_index, 109 | } 110 | notebook_problem_index += 1 111 | yield example 112 | except Exception as e: 113 | pass 114 | return examples 115 | 116 | 117 | def read_filepaths(): 118 | with open(DSP) as f: 119 | return f.readlines() 120 | 121 | 122 | def read_problems(context_len=3): 123 | ps = read_filepaths() 124 | examples = {} 125 | for path in tqdm(ps, total=len(ps)): 126 | for example in build_examples_new(path, context_len=context_len): 127 | examples[example["task_id"]] = example 128 | return examples 129 | 130 | 131 | -------------------------------------------------------------------------------- /src/data_science_problems/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import gzip 5 | import os 6 | import json 7 | import faulthandler 8 | import platform 9 | import itertools 10 | import numpy as np 11 | 12 | 13 | def estimate_pass_at_k( 14 | num_samples, 15 | num_correct, 16 | k 17 | ): 18 | """ 19 | Estimates pass@k of each problem and returns them in an array. 20 | """ 21 | 22 | def estimator(n: int, c: int, k: int) -> float: 23 | """ 24 | Calculates 1 - comb(n - c, k) / comb(n, k). 25 | """ 26 | if n - c < k: 27 | return 1.0 28 | return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) 29 | 30 | if isinstance(num_samples, int): 31 | num_samples_it = itertools.repeat(num_samples, len(num_correct)) 32 | else: 33 | assert len(num_samples) == len(num_correct) 34 | num_samples_it = iter(num_samples) 35 | 36 | return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]) 37 | 38 | 39 | def write_jsonl(filename: str, data, append: bool = False): 40 | """ 41 | Writes an iterable of dictionaries to jsonl 42 | """ 43 | if append: 44 | mode = 'ab' 45 | else: 46 | mode = 'wb' 47 | filename = os.path.expanduser(filename) 48 | if filename.endswith(".gz"): 49 | with open(filename, mode) as fp: 50 | with gzip.GzipFile(fileobj=fp, mode='wb') as gzfp: 51 | for x in data: 52 | gzfp.write((json.dumps(x) + "\n").encode('utf-8')) 53 | else: 54 | with open(filename, mode) as fp: 55 | for x in data: 56 | fp.write((json.dumps(x) + "\n").encode('utf-8')) 57 | 58 | 59 | def stream_jsonl(filename: str): 60 | """ 61 | Parses each jsonl line and yields it as a dictionary 62 | """ 63 | if filename.endswith(".gz"): 64 | with open(filename, "rb") as gzfp: 65 | with gzip.open(gzfp, 'rt') as fp: 66 | for line in fp: 67 | if any(not x.isspace() for x in line): 68 | yield json.loads(line) 69 | else: 70 | with open(filename, "r") as fp: 71 | for line in fp: 72 | if any(not x.isspace() for x in line): 73 | yield json.loads(line) 74 | 75 | 76 | def reliability_guard(maximum_memory_bytes=None): 77 | """ 78 | This disables various destructive functions and prevents the generated code 79 | from interfering with the test (e.g. fork bomb, killing other processes, 80 | removing filesystem files, etc.) 81 | 82 | WARNING 83 | This function is NOT a security sandbox. Untrusted code, including, model- 84 | generated code, should not be blindly executed outside of one. 85 | """ 86 | 87 | if maximum_memory_bytes is not None: 88 | import resource 89 | resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)) 90 | resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)) 91 | if not platform.uname().system == 'Darwin': 92 | resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)) 93 | 94 | faulthandler.disable() 95 | 96 | import builtins 97 | builtins.exit = None 98 | builtins.quit = None 99 | 100 | import os 101 | # os.environ['OMP_NUM_THREADS'] = '1' 102 | 103 | # os.kill = None 104 | os.system = None 105 | os.putenv = None 106 | os.remove = None 107 | os.removedirs = None 108 | # os.rmdir = None 109 | os.fchdir = None 110 | os.setuid = None 111 | os.fork = None 112 | os.forkpty = None 113 | os.killpg = None 114 | os.rename = None 115 | os.renames = None 116 | os.truncate = None 117 | os.replace = None 118 | os.unlink = None 119 | os.fchmod = None 120 | os.fchown = None 121 | os.chmod = None 122 | os.chown = None 123 | os.chroot = None 124 | os.fchdir = None 125 | os.lchflags = None 126 | os.lchmod = None 127 | os.lchown = None 128 | os.getcwd = None 129 | os.chdir = None 130 | 131 | import shutil 132 | # shutil.rmtree = None 133 | shutil.move = None 134 | shutil.chown = None 135 | 136 | import subprocess 137 | subprocess.Popen = None # type: ignore 138 | 139 | __builtins__['help'] = None 140 | 141 | import sys 142 | sys.modules['ipdb'] = None 143 | sys.modules['joblib'] = None 144 | sys.modules['resource'] = None 145 | sys.modules['psutil'] = None 146 | sys.modules['tkinter'] = None 147 | -------------------------------------------------------------------------------- /src/data_science_problems/execution.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | from tqdm import tqdm 6 | from pathlib import Path 7 | from collections import defaultdict 8 | 9 | import nbformat 10 | from nbclient import NotebookClient 11 | 12 | from data_science_problems.read import read_problems, read_filepaths, extract_initial_comments 13 | from data_science_problems.utils import stream_jsonl, estimate_pass_at_k, reliability_guard 14 | from data_science_problems.progress import ProgressBar 15 | 16 | 17 | import ray 18 | ray.init(log_to_driver=False) 19 | 20 | 21 | def refersh_and_save(path, fout, completions, problems): 22 | path = Path(path.strip()) 23 | nb = nbformat.read(path, as_version=4) 24 | 25 | cells_json = nb["cells"] 26 | cells = [''.join(cell['source']) for cell in cells_json] 27 | for idx in range(len(cells)-1): 28 | if "task_id" in cells_json[idx]["metadata"]: 29 | task_id = cells_json[idx]["metadata"]["task_id"] 30 | 31 | # verify that it is the right cell 32 | comment, solution = extract_initial_comments(cells_json[idx]["source"]) 33 | assert solution == problems[task_id]["solution"] 34 | assert cells_json[idx+1]["source"] == problems[task_id]["test"] 35 | 36 | for completion_id, completion in enumerate(completions[task_id]): 37 | # fill the cell with generated code 38 | cells_json[idx]["source"] = "#### GENERATED\n" + completion 39 | 40 | # write the refreshed notebook to it's own file 41 | task_no = task_id.split('/')[1] 42 | suffix = f".{task_no}.{completion_id}.ipynb" 43 | outnb = path.parent / path.parts[-1].replace(".ipynb", suffix) 44 | print(outnb, file=fout) 45 | nbformat.write(nb, outnb) 46 | 47 | # fill the cell back with original code 48 | cells_json[idx]["source"] = problems[task_id]["solution"] 49 | 50 | 51 | @ray.remote 52 | def execute(notebook_filename, actor, ferr): 53 | actor.update.remote(1) 54 | notebook_filename = Path(notebook_filename.strip()) 55 | nb = nbformat.read(notebook_filename, as_version=4) 56 | parent = notebook_filename.parent 57 | client = NotebookClient(nb, 58 | timeout=10, 59 | kernel_name="python3", 60 | resources= {'metadata': {'path': parent}}, 61 | allow_errors=True 62 | ) 63 | try: 64 | enb = client.execute() 65 | except Exception as e: 66 | print(notebook_filename, file=ferr) 67 | return 68 | nbformat.write(enb, notebook_filename) 69 | print(notebook_filename) 70 | 71 | 72 | def has_no_error(x): 73 | for element in x: 74 | if "ename" in element: 75 | return False 76 | return True 77 | 78 | 79 | def evaluate(path): 80 | path = Path(path.strip()) 81 | nb = nbformat.read(path, as_version=4) 82 | 83 | cells_json = nb["cells"] 84 | cells = [''.join(cell['source']) for cell in cells_json] 85 | for idx in range(len(cells)-1): 86 | if "task_id" in cells_json[idx]["metadata"]: 87 | task_id = cells_json[idx]["metadata"]["task_id"] 88 | source = cells_json[idx]["source"] 89 | if "#### GENERATED" in source: 90 | # print(task_id) 91 | test = cells_json[idx+1]["outputs"] 92 | return has_no_error(test), task_id 93 | 94 | 95 | def evaluate_dsp(sample_file="samples.jsonl", ks=[1, 10, 100]): 96 | 97 | print("Reading the generated samples.") 98 | problems = read_problems() 99 | completions = defaultdict(list) 100 | for s in stream_jsonl(sample_file): 101 | completions[s["task_id"]].append(s["completion"]) 102 | 103 | 104 | # create new notebooks with generated code filled in 105 | print("Saving to new notebooks with generated samples.") 106 | ps = read_filepaths() 107 | out_file = "generated.txt" 108 | with open(out_file, "w") as fout: 109 | for path in tqdm(ps, total=len(ps)): 110 | refersh_and_save(path, fout, completions, problems) 111 | 112 | 113 | # disable functionalities that can make destructive changes to the test 114 | reliability_guard() 115 | 116 | # execute the notebooks with generated code 117 | print("Execute the new notebooks with generated samples.") 118 | with open(out_file) as f: 119 | ps = f.readlines() 120 | 121 | pb = ProgressBar(len(ps)) 122 | with open("errors.txt", "w") as ferr: 123 | tasks_pre_launch = [execute.remote(notebook_filename, pb.actor, ferr) for notebook_filename in ps] 124 | pb.print_until_done() 125 | tasks = ray.get(tasks_pre_launch) 126 | 127 | 128 | # calculate pass@k. 129 | print("Complute pass@k for the executed notebooks.") 130 | with open(out_file) as f: 131 | ps = f.readlines() 132 | 133 | results = defaultdict(list) 134 | for notebook_filename in tqdm(ps): 135 | result, task_id = evaluate(notebook_filename) 136 | results[task_id].append(result) 137 | 138 | total, correct = [], [] 139 | for result in results.values(): 140 | result.sort() 141 | passed = [bool(r) for r in result] 142 | total.append(len(passed)) 143 | correct.append(sum(passed)) 144 | total = np.array(total) 145 | correct = np.array(correct) 146 | 147 | pass_at_k = {f"pass@{k}": estimate_pass_at_k(total, correct, k).mean() \ 148 | for k in ks if (total >= k).all()} 149 | return pass_at_k 150 | 151 | 152 | if __name__ == "__main__": 153 | evaluate_dsp() 154 | 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Science Problems 2 | 3 | Evaluate a natural language code generation model on real data science pedagogical notebooks! Data Science Problems (DSP) includes well-posed data science problems in Markdown along with unit tests to verify correctness and a Docker environment for reproducible execution. About 1/3 of notebooks in this benchmark also include data dependencies, so this benchmark not only can test a model's ability to chain together complex tasks, but also evaluate the solutions on real data! See our paper [Training and Evaluating a Jupyter Notebook Data Science Assistant](https://arxiv.org/abs/2201.12901) for more details about state of the art results and other properties of the dataset. 4 | 5 | ## Installation 6 | 7 | This project requires Python 3.6+ and Docker to run. Assuming you have these, to get started first download and install the Python package: 8 | 9 | ``` 10 | $ git clone git@github.com:microsoft/DataScienceProblems.git 11 | $ cd DataScienceProblems/src 12 | $ pip install -e . 13 | ``` 14 | 15 | ## Usage 16 | 17 | ### Reading the problems 18 | 19 | Extract the `juice-github-repos.tar.gz` file from the `DataScienceProblems` repository. 20 | 21 | ``` 22 | $ tar -xvzf juice-github-repos.tar.gz 23 | ``` 24 | 25 | ### Data Schema 26 | 27 | Here is an example of a notebook context, prompt cell (1 and the markdown), solution cell (2), and unit tests cell (3). 28 | 29 |  30 | 31 | The DSP schema corresponding to this example includes the `prompt` which is the question to be asked to the student, `solution` which is the answer to the question and `test` which is the test case to be run on the student's code. 32 | 33 | ```python 34 | { 35 | 'notebook_path': '/path/to/the/notebook.ipynb', 36 | 'notebook_problem_index': 0, 37 | 'prompt': '%matplotlib inline\n' 38 | 'import matplotlib.pyplot as plt\n' 39 | 'import numpy as np\n' 40 | 'import scipy.optimize as opt\n' 41 | '## Hat potential\n' 42 | 'The following potential is often used in Physics and other fields ' 43 | 'to describe symmetry breaking and is often known as the "hat ' 44 | 'potential":\n' 45 | '\n' 46 | '$$ V(x) = -a x^2 + b x^4 $$\n' 47 | '\n' 48 | 'Write a function `hat(x,a,b)` that returns the value of this ' 49 | 'function:\n', 50 | 'solution': 'def hat(x,a=5.0,b=1.0):\n return -a* x*x + b*x**4', 51 | 'task_id': 'DSP/414', 52 | 'test': 'assert hat(0.0, 1.0, 1.0)==0.0\n' 53 | 'assert hat(0.0, 1.0, 1.0)==0.0\n' 54 | 'assert hat(1.0, 10.0, 1.0)==-9.0' 55 | } 56 | ``` 57 | 58 | We provide a `read_problems` function that can be used to read the problems from the jupyter notebooks. 59 | 60 | Below is an example of how to use the `read_problems` function and use your generated code samples to save the samples to a file. 61 | 62 | 63 | ```python 64 | from data_science_problems.read import read_problems 65 | from data_science_problems.utils import write_jsonl 66 | 67 | problems = read_problems() 68 | 69 | num_samples = 1 70 | samples = [ 71 | dict(task_id=task_id, completion=generate_code(problems[task_id]["prompt"])) 72 | for task_id in problems 73 | for _ in range(num_samples) 74 | ] 75 | write_jsonl("samples.jsonl", samples) 76 | ``` 77 | 78 | 79 | ### Executing the problems and unit tests 80 | 81 | Once you have saved the generated samples in the `samples.jsonl` file, you need to build the provided docker container, which would help you safely run the generated samples inside the container. 82 | 83 | Use the following command to build the docker container. 84 | 85 | ``` 86 | $ docker build --pull --rm -f "Dockerfile" -t datascienceproblems:latest "." 87 | ``` 88 | 89 | Once the Docker container is built, you can execute the generated samples inside the container. You'll need to map the `/app/juice-github-repos` and `/samples/samples.jsonl` directory to the host directory where the notebooks are stored. 90 | 91 | Use the following command to execute the samples inside the container. 92 | 93 | ``` 94 | $ docker run -it --rm -v $PWD/juice-github-repos:/app/juice-github-repos -v $PWD/samples.jsonl:/samples/samples.jsonl datascienceproblems /samples/samples.jsonl 95 | ``` 96 | 97 | 98 | The `docker run` will perform the following things: 99 | 100 | - It will read the samples from the `samples.jsonl` file. 101 | - It will create new notebooks with the generated code samples. The list of new notebooks is saved in the `generates-notebooks.txt` file. 102 | - It will execute these new notebooks. 103 | - It will compute `pass@k` for generated samples. 104 | 105 | > **WARNING: Running the `docker run` command with `num_samples = 1` will create with ~1000 new notebooks and save them on your disk. This may take a while.** 106 | 107 | ``` 108 | $ docker run -it --rm -v $PWD/juice-github-repos:/app/juice-github-repos -v $PWD/samples.jsonl:/samples/samples.jsonl datascienceproblems /samples/samples.jsonl 109 | 2021-11-02 09:11:11,847 INFO services.py:1164 -- View the Ray dashboard at http://127.0.0.1:8265 110 | Reading the generated samples. 111 | 100%|███████████████████████████████████████████████| 305/305 [00:03<00:00, 97.34it/s] 112 | Saving to new notebooks with generated samples. 113 | 100%|███████████████████████████████████████████████| 305/305 [00:36<00:00, 8.47it/s] 114 | Execute the new notebooks with generated samples. 115 | 100%|███████████████████████████████████████████████| 2192/2192 [05:17<00:40, 9.49it/s] 116 | Complute pass@k for the executed notebooks. 117 | 100%|███████████████████████████████████████████████| 2192/2192 [00:28<00:00, 76.73it/s] 118 | {'pass@1': ..., 'pass@10': ...} 119 | ``` 120 | 121 | ##### Trademarks 122 | 123 | > This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow Microsoft’s Trademark & Brand Guidelines. Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies. 124 | 125 | ## Dataset Metadata 126 | 127 | The following table is necessary for this dataset to be indexed by search 128 | engines such as Google Dataset Search. 129 |
| property | 133 |value | 134 |
|---|---|
| name | 137 |Data Science Problems |
138 |
| url | 141 |https://github.com/microsoft/DataScienceProblems |
142 |
| sameAs | 145 |https://github.com/microsoft/DataScienceProblems |
146 |
| description | 149 |
150 | Evaluate a natural language code generation model on real data science pedagogical notebooks!
151 | Data Science Problems (DSP) includes well-posed data science problems in Markdown along with
152 | unit tests to verify correctness and a Docker environment for reproducible execution. About
153 | 1/3 of notebooks in this benchmark also include data dependencies, so this benchmark not only
154 | can test a model's ability to chain together complex tasks, but also evaluate the solutions
155 | on real data! See our paper Training and Evaluating a Jupyter Notebook Data Science Assistant
156 | (https://arxiv.org/abs/2201.12901) for more details about state of the art results and other
157 | properties of the dataset.
158 | |
159 |
| citation | 162 |https://arxiv.org/abs/2201.12901 |
163 |
| license | 166 |https://github.com/microsoft/DataScienceProblems/blob/main/LICENSE.txt |
167 |