├── .gitattributes
├── example.png
├── juice-github-repos.tar.gz
├── Dockerfile
├── src
    ├── requirements.txt
    ├── data_science_problems
    │   ├── evaluate_dsp.py
    │   ├── progress.py
    │   ├── read.py
    │   ├── utils.py
    │   └── execution.py
    ├── setup.py
    └── data-science-notebooks.txt
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── main.yml
    │   ├── pypi-publish.yml
    │   ├── pylint.yml
    │   ├── mypy.yml
    │   ├── pyright.yml
    │   ├── black.yml
    │   └── pypi-test-publish.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── .gitignore
├── SECURITY.md
├── .pylintrc
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | juice-github-repos.tar.gz filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/DataScienceProblems/HEAD/example.png


--------------------------------------------------------------------------------
/juice-github-repos.tar.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:92859fc9076f8d0717918a73420cc2b03329509b131fd74c058e707e49fb336d
3 | size 3131051591
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (C) Microsoft Corporation. All rights reserved.
 2 | 
 3 | FROM continuumio/miniconda3
 4 | 
 5 | 
 6 | WORKDIR /app
 7 | COPY src /app
 8 | 
 9 | RUN pip install -e .
10 | 
11 | ENTRYPOINT ["evaluate_dsp"]


--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | tqdm
 3 | nbclient
 4 | nbformat
 5 | ray
 6 | fire
 7 | jupyter
 8 | matplotlib
 9 | pandas
10 | pytest
11 | scikit-learn
12 | nose
13 | sympy
14 | nltk
15 | seaborn
16 | scikit-image
17 | SQLAlchemy
18 | altair
19 | bs4
20 | torch


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: pip
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: daily
 7 |     time: "13:00"
 8 |   open-pull-requests-limit: 10
 9 |   assignees:
10 |   - dciborow
11 |   - mattchansky
12 |   allow:
13 |   - dependency-type: direct
14 |   - dependency-type: indirect
15 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: wemake-python-styleguide
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 |   workflow_dispatch:
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 | 
16 |       - name: wemake-python-styleguide
17 |         uses: wemake-services/wemake-python-styleguide@0.15.3
18 |         env:
19 |           GITHUB_TOKEN: ${{ github.token }}
20 |         with:
21 |           path: ai-python-package
22 |           reporter: github-pr-review
23 | 


--------------------------------------------------------------------------------
/src/data_science_problems/evaluate_dsp.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import fire
 5 | import sys
 6 | 
 7 | from data_science_problems.execution import evaluate_dsp
 8 | 
 9 | 
10 | def entry_point(
11 |     sample_file: str,
12 |     k: str = "1,10,100",
13 | ):
14 |     """
15 |     Evaluates the functional correctness of generated samples.
16 |     """
17 |     k = list(map(int, k.split(",")))
18 |     results = evaluate_dsp(sample_file, k)
19 |     print(results)
20 | 
21 | 
22 | def main():
23 |     fire.Fire(entry_point)
24 | 
25 | sys.exit(main())
26 | 


--------------------------------------------------------------------------------
/src/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import os
 5 | 
 6 | import pkg_resources
 7 | from setuptools import setup, find_packages
 8 | 
 9 | setup(
10 |     name="DataScienceProblems",
11 |     py_modules=["DataScienceProblems"],
12 |     version="1.0",
13 |     description="",
14 |     author="Microsoft",
15 |     packages=find_packages(),
16 |     install_requires=[
17 |         str(r)
18 |         for r in pkg_resources.parse_requirements(
19 |             open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
20 |         )
21 |     ],
22 |     entry_points={
23 |         "console_scripts": [
24 |             "evaluate_dsp = data_science_problems.evaluate_dsp",
25 |         ]
26 |     }
27 | )
28 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - uses: actions/checkout@v2
12 |     - name: Set up Python
13 |       uses: actions/setup-python@v1
14 |       with:
15 |         python-version: '3.x'
16 |     - name: Install dependencies
17 |       run: |
18 |         python -m pip install --upgrade pip
19 |         pip install setuptools wheel twine
20 |         python setup.py sdist bdist_wheel
21 |     - name: Test dependencies
22 |       run: |
23 |         pip install -e .[all]
24 | 
25 |     - name: Publish package
26 |       uses: pypa/gh-action-pypi-publish@release/v1
27 |       with:
28 |         user: __token__
29 |         password: ${{ secrets.PYPI_PASSWORD }}
30 | 


--------------------------------------------------------------------------------
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | name: PyLint
 2 | on:
 3 |   # Triggers the workflow on push or pull request events but only for the main branch
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   pylint:
14 |     name: runner / pylint
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v2
18 |       - uses: dciborow/action-pylint@0.0.3
19 |         with:
20 |           github_token: ${{ secrets.github_token }}
21 |           # Change reviewdog reporter if you need [github-pr-check,github-check,github-pr-review].
22 |           reporter: github-pr-review
23 |           # Change reporter level if you need.
24 |           # GitHub Status Check won't become failure with warning.
25 |           workdir: ai-python-package
26 |           level: warning
27 | 


--------------------------------------------------------------------------------
/.github/workflows/mypy.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: MyPy
 4 | 
 5 | # Controls when the workflow will run
 6 | on:
 7 |   # Triggers the workflow on push or pull request events but only for the main branch
 8 |   push:
 9 |     branches: [ main ]
10 |   pull_request:
11 |     branches: [ main ]
12 | 
13 |   # Allows you to run this workflow manually from the Actions tab
14 |   workflow_dispatch:
15 | jobs:
16 |   linter_name:
17 |     name: runner / black formatter
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - name: Run mypy with reviewdog
22 |         # You may pin to the exact commit or the version.
23 |         # uses: tsuyoshicho/action-mypy@2160e947d397ac0be7f02c911c3a5bea3f498575
24 |         uses: tsuyoshicho/action-mypy@v3.1.0
25 |         with:
26 |           reporter: github-pr-review
27 |           workdir: ai-python-package
28 | 


--------------------------------------------------------------------------------
/.github/workflows/pyright.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: pyright
 4 | 
 5 | # Controls when the workflow will run
 6 | on:
 7 |   # Triggers the workflow on push or pull request events but only for the main branch
 8 |   push:
 9 |     branches: [ main ]
10 |   pull_request:
11 |     branches: [ main ]
12 | 
13 |   # Allows you to run this workflow manually from the Actions tab
14 |   workflow_dispatch:
15 | jobs:
16 |   linter_name:
17 |     name: runner / black formatter
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - uses: jordemort/action-pyright@v1
22 |         with:
23 |           github_token: ${{ secrets.GITHUB_TOKEN }} # You need this
24 |           reporter: github-pr-review # Change reporter.
25 |           lib: true          
26 |       - uses: ricardochaves/python-lint@v1.4.0
27 |         with:
28 |           python-root-list: "ai-python-package"
29 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | This project welcomes contributions and suggestions. Most contributions require you to
 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to,
 5 | and actually do, grant us the rights to use your contribution. For details, visit
 6 | https://cla.microsoft.com.
 7 | 
 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need
 9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the
10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
11 | 
12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 | 


--------------------------------------------------------------------------------
/.github/workflows/black.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: Black
 4 | 
 5 | # Controls when the workflow will run
 6 | on:
 7 |   # Triggers the workflow on push or pull request events but only for the main branch
 8 |   push:
 9 |     branches: [ main ]
10 |   pull_request:
11 |     branches: [ main ]
12 | 
13 |   # Allows you to run this workflow manually from the Actions tab
14 |   workflow_dispatch:
15 | jobs:
16 |   linter_name:
17 |     name: runner / black formatter
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - name: Check files using the black formatter
22 |         uses: rickstaa/action-black@v1
23 |         id: action_black
24 |         with:
25 |           black_args: "."
26 |       - name: Annotate diff changes using reviewdog
27 |         if: steps.action_black.outputs.is_formatted == 'true'
28 |         uses: reviewdog/action-suggester@v1
29 |         with:
30 |           tool_name: blackfmt
31 |           fail_on_error: true
32 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) Microsoft Corporation.
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-test-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Test Upload Python Package
 2 | 
 3 | on:
 4 |   workflow_dispatch
 5 | 
 6 | jobs:
 7 |   deploy:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - uses: actions/checkout@v2
11 |       with:
12 |             python-version: '3.7'
13 |     - name: Setup Python
14 |       uses: actions/setup-python@v2.2.2
15 |       with:
16 |         python-version: 3.7    
17 |     - name: Install dependencies
18 |       run: |
19 |         python3.7 -m pip install --upgrade pip
20 |         python3.7 -m pip install virtualenv pip-tools
21 |         sudo add-apt-repository -y ppa:ubuntugis/ppa \
22 |         && sudo apt install -y \
23 |           gdal-bin libgdal-dev
24 | 
25 |         mv requirements.txt requirements.in
26 |         touch requirements.txt
27 |         echo "" >> requirements.in
28 |         cat requirements.linked.in >> requirements.in
29 |         pip-compile -v requirements.in
30 | 
31 |         cd ai-python
32 | 
33 |         requirements=$(find . -type f -name "*requirements*" | paste -sd " ")
34 | 
35 |         for req in $requirements
36 |         do
37 |           mv $req temp.in
38 |           echo "-c ../requirements.txt" >> temp.in	
39 |           touch $req
40 |           pip-compile temp.in -o $req
41 |         done
42 | 
43 |         cd ..
44 | 
45 |         python setup.py sdist bdist_wheel
46 |     - name: Publish package
47 |       uses: pypa/gh-action-pypi-publish@release/v1
48 |       with:
49 |         user: __token__
50 |         password: ${{ secrets.TEST_PYPI_PASSWORD }}
51 |         repository_url: https://test.pypi.org/legacy/
52 |     - name: Install dependencies
53 |       run: |
54 |         python3.7 -m pip install --upgrade pip
55 |         python3.7 -m pip install virtualenv pip-tools
56 |         pip-compile --extra-index-url https://test.pypi.org/simple
57 |           ai-python-package[tests]
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | juice-github-repos/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/src/data_science_problems/progress.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from asyncio import Event
 5 | from typing import Tuple
 6 | from time import sleep
 7 | 
 8 | import ray
 9 | # For typing purposes
10 | from ray.actor import ActorHandle
11 | from tqdm import tqdm
12 | 
13 | 
14 | @ray.remote
15 | class ProgressBarActor:
16 |     counter: int
17 |     delta: int
18 |     event: Event
19 | 
20 |     def __init__(self) -> None:
21 |         self.counter = 0
22 |         self.delta = 0
23 |         self.event = Event()
24 | 
25 |     def update(self, num_items_completed: int) -> None:
26 |         """Updates the ProgressBar with the incremental
27 |         number of items that were just completed.
28 |         """
29 |         self.counter += num_items_completed
30 |         self.delta += num_items_completed
31 |         self.event.set()
32 | 
33 |     async def wait_for_update(self) -> Tuple[int, int]:
34 |         """Blocking call.
35 | 
36 |         Waits until somebody calls `update`, then returns a tuple of
37 |         the number of updates since the last call to
38 |         `wait_for_update`, and the total number of completed items.
39 |         """
40 |         await self.event.wait()
41 |         self.event.clear()
42 |         saved_delta = self.delta
43 |         self.delta = 0
44 |         return saved_delta, self.counter
45 | 
46 |     def get_counter(self) -> int:
47 |         """
48 |         Returns the total number of complete items.
49 |         """
50 |         return self.counter
51 | 
52 | 
53 | class ProgressBar:
54 |     progress_actor: ActorHandle
55 |     total: int
56 |     description: str
57 |     pbar: tqdm
58 | 
59 |     def __init__(self, total: int, description: str = ""):
60 |         # Ray actors don't seem to play nice with mypy, generating
61 |         # a spurious warning for the following line,
62 |         # which we need to suppress. The code is fine.
63 |         self.progress_actor = ProgressBarActor.remote()  # type: ignore
64 |         self.total = total
65 |         self.description = description
66 | 
67 |     @property
68 |     def actor(self) -> ActorHandle:
69 |         """Returns a reference to the remote `ProgressBarActor`.
70 | 
71 |         When you complete tasks, call `update` on the actor.
72 |         """
73 |         return self.progress_actor
74 | 
75 |     def print_until_done(self) -> None:
76 |         """Blocking call.
77 | 
78 |         Do this after starting a series of remote Ray tasks, to which you've
79 |         passed the actor handle. Each of them calls `update` on the actor.
80 |         When the progress meter reaches 100%, this method returns.
81 |         """
82 |         pbar = tqdm(desc=self.description, total=self.total)
83 |         while True:
84 |             delta, counter = ray.get(self.actor.wait_for_update.remote())
85 |             pbar.update(delta)
86 |             if counter >= self.total:
87 |                 pbar.close()
88 |                 return
89 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | extension-pkg-whitelist=numpy,torch,cv2,pyodbc,pydantic,ciso8601,netcdf4,scipy
  3 | ignore=CVS
  4 | ignore-patterns=test.*?py,conftest.py
  5 | init-hook='import sys; sys.setrecursionlimit(8 * sys.getrecursionlimit())'
  6 | jobs=0
  7 | limit-inference-results=100
  8 | persistent=yes
  9 | suggestion-mode=yes
 10 | unsafe-load-any-extension=no
 11 | [MESSAGES CONTROL]
 12 | confidence=
 13 | enable=c-extension-no-member
 14 | 
 15 | [REPORTS]
 16 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 17 | output-format=text
 18 | reports=no
 19 | score=yes
 20 | [REFACTORING]
 21 | max-nested-blocks=5
 22 | never-returning-functions=sys.exit
 23 | 
 24 | [BASIC]
 25 | argument-naming-style=snake_case
 26 | attr-naming-style=snake_case
 27 | bad-names=foo,
 28 |           bar,
 29 |           baz,
 30 |           toto,
 31 |           tutu,
 32 |           tata
 33 | class-attribute-naming-style=any
 34 | class-naming-style=PascalCase
 35 | const-naming-style=UPPER_CASE
 36 | docstring-min-length=-1
 37 | function-naming-style=snake_case
 38 | good-names=i,
 39 |            j,
 40 |            k,
 41 |            ex,
 42 |            Run,
 43 |            _,
 44 |            df,
 45 |            n,
 46 |            N,
 47 |            t,
 48 |            T,
 49 |            ax
 50 | include-naming-hint=yes
 51 | inlinevar-naming-style=any
 52 | method-naming-style=snake_case
 53 | module-naming-style=any
 54 | name-group=
 55 | no-docstring-rgx=^_
 56 | property-classes=abc.abstractproperty
 57 | variable-naming-style=snake_case
 58 | 
 59 | [FORMAT]
 60 | expected-line-ending-format=
 61 | ignore-long-lines=^\s*(# )?.*['"]?<?https?://\S+>?
 62 | indent-after-paren=4
 63 | indent-string='    '
 64 | max-line-length=120
 65 | max-module-lines=1000
 66 | no-space-check=trailing-comma,
 67 |                dict-separator
 68 | single-line-class-stmt=no
 69 | single-line-if-stmt=no
 70 | 
 71 | [LOGGING]
 72 | logging-format-style=old
 73 | logging-modules=logging
 74 | 
 75 | [MISCELLANEOUS]
 76 | notes=FIXME,
 77 |       XXX,
 78 |       TODO
 79 |       
 80 | [SIMILARITIES]
 81 | ignore-comments=yes
 82 | ignore-docstrings=yes
 83 | ignore-imports=yes
 84 | min-similarity-lines=7
 85 | 
 86 | [SPELLING]
 87 | max-spelling-suggestions=4
 88 | spelling-dict=
 89 | spelling-ignore-words=
 90 | spelling-private-dict-file=
 91 | spelling-store-unknown-words=no
 92 | 
 93 | [STRING]
 94 | check-str-concat-over-line-jumps=no
 95 | 
 96 | [TYPECHECK]
 97 | contextmanager-decorators=contextlib.contextmanager
 98 | generated-members=numpy.*,np.*,pyspark.sql.functions,collect_list
 99 | ignore-mixin-members=yes
100 | ignore-none=yes
101 | ignore-on-opaque-inference=yes
102 | ignored-classes=optparse.Values,thread._local,_thread._local,numpy,torch,swagger_client
103 | ignored-modules=numpy,torch,swagger_client,netCDF4,scipy
104 | missing-member-hint=yes
105 | missing-member-hint-distance=1
106 | missing-member-max-choices=1
107 | signature-mutators=
108 | 
109 | [VARIABLES]
110 | additional-builtins=dbutils
111 | allow-global-unused-variables=yes
112 | callbacks=cb_,
113 |           _cb
114 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
115 | ignored-argument-names=_.*|^ignored_|^unused_
116 | init-import=no
117 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
118 | 
119 | [CLASSES]
120 | defining-attr-methods=__init__,
121 |                       __new__,
122 |                       setUp,
123 |                       __post_init__
124 | exclude-protected=_asdict,
125 |                   _fields,
126 |                   _replace,
127 |                   _source,
128 |                   _make
129 | valid-classmethod-first-arg=cls
130 | valid-metaclass-classmethod-first-arg=cls
131 | 
132 | [DESIGN]
133 | max-args=5
134 | max-attributes=7
135 | max-bool-expr=5
136 | max-branches=12
137 | max-locals=15
138 | max-parents=7
139 | max-public-methods=20
140 | max-returns=6
141 | max-statements=50
142 | min-public-methods=2
143 | 
144 | [IMPORTS]
145 | allow-any-import-level=
146 | allow-wildcard-with-all=no
147 | analyse-fallback-blocks=no
148 | deprecated-modules=optparse,tkinter.tix
149 | ext-import-graph=
150 | import-graph=
151 | int-import-graph=
152 | known-standard-library=
153 | known-third-party=enchant, azureiai-logistics-inventoryplanning
154 | preferred-modules=
155 | 
156 | [EXCEPTIONS]
157 | overgeneral-exceptions=BaseException,
158 |                        Exception
159 | 


--------------------------------------------------------------------------------
/src/data_science_problems/read.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | from pathlib import Path
  5 | from tqdm import tqdm
  6 | 
  7 | import os
  8 | import nbformat
  9 | from shutil import copyfile
 10 | 
 11 | 
 12 | ROOT = Path(os.path.abspath(__file__)).parent.parent
 13 | DSP = ROOT / "data-science-notebooks.txt"
 14 | 
 15 | 
 16 | def extract_initial_comments(tgt):
 17 |     tgt = tgt.strip().split("\n")
 18 |     for idx, t in enumerate(tgt):
 19 |         if t.strip() == "":
 20 |             continue
 21 |         if not t.startswith("#"):
 22 |             break
 23 |     return "\n".join(tgt[:idx]), "\n".join(tgt[idx:])
 24 | 
 25 | 
 26 | def build_examples(path, context_len=3):
 27 |     path = Path(path.strip())
 28 | 
 29 |     try:
 30 |         nb = nbformat.read(path, as_version=4)
 31 |     except:
 32 |         copyfile("/storage/data/" / path, path)
 33 |         nb = nbformat.read(path, as_version=4)
 34 | 
 35 |     cells_json = nb["cells"]
 36 |     cells = [''.join(cell['source']) for cell in cells_json]
 37 |     
 38 |     examples = []
 39 |     notebook_problem_index = 0
 40 |     for idx in range(len(cells)-1):
 41 |         cell_type = cells_json[idx]["cell_type"]
 42 |         i, j = max(0, idx-context_len), idx
 43 | 
 44 |         target = cells[idx]
 45 |         source = "\n".join(cells[i:j])
 46 | 
 47 |         # a number of times, the inital comments in the target
 48 |         # contains the problem statement. need that to solve the problem
 49 |         comment, target = extract_initial_comments(target)
 50 |         source = source + "\n" + comment
 51 |         
 52 |         next = cells[idx+1]
 53 | 
 54 |         try:
 55 |             if cells_json[idx]["metadata"]["nbgrader"]["solution"]:
 56 |                 if "assert" in "".join(cells_json[idx+1]["source"]):
 57 |                     if cell_type == "code":
 58 |                         example = {
 59 |                             "prompt": source,
 60 |                             "test": next,
 61 |                             "solution": target,
 62 |                             "notebook_path": path,
 63 |                             "notebook_problem_index": notebook_problem_index
 64 |                         }
 65 |                         notebook_problem_index += 1
 66 |                         yield example
 67 |         # catch if the key metadata.nbgrader.solution do not exist
 68 |         except Exception as e:
 69 |             pass   
 70 |     return examples
 71 | 
 72 | 
 73 | def build_examples_new(path, context_len=3):
 74 |     path = ROOT.parent / Path(path.strip())
 75 |     # try:
 76 |     nb = nbformat.read(path, as_version=4)
 77 |     # except:
 78 |     #     copyfile("/storage/data/" / path, path)
 79 |     #     nb = nbformat.read(path, as_version=4)
 80 | 
 81 |     cells_json = nb["cells"]
 82 |     cells = [''.join(cell['source']) for cell in cells_json]
 83 |     
 84 |     examples = []
 85 |     notebook_problem_index = 0
 86 |     for idx in range(len(cells)-1):
 87 |         cell_type = cells_json[idx]["cell_type"]
 88 |         i, j = max(0, idx-context_len), idx
 89 | 
 90 |         target = cells[idx]
 91 |         source = "\n".join(cells[i:j])
 92 | 
 93 |         # a number of times, the inital comments in the target
 94 |         # contains the problem statement. need that to solve the problem
 95 |         comment, target = extract_initial_comments(target)
 96 |         source = source + "\n" + comment
 97 |         
 98 |         next = cells[idx+1]
 99 | 
100 |         try:
101 |             task_id = cells_json[idx]["metadata"]["task_id"]
102 |             example = {
103 |                 "task_id": task_id,
104 |                 "prompt": source,
105 |                 "solution": target,
106 |                 "test": next,
107 |                 "notebook_path": str(path),
108 |                 "notebook_problem_index": notebook_problem_index,
109 |             }
110 |             notebook_problem_index += 1
111 |             yield example
112 |         except Exception as e:
113 |             pass   
114 |     return examples
115 | 
116 | 
117 | def read_filepaths():
118 |     with open(DSP) as f:
119 |         return f.readlines()
120 | 
121 | 
122 | def read_problems(context_len=3):
123 |     ps = read_filepaths()
124 |     examples = {}
125 |     for path in tqdm(ps, total=len(ps)):
126 |         for example in build_examples_new(path, context_len=context_len):
127 |             examples[example["task_id"]] = example
128 |     return examples
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/src/data_science_problems/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import gzip
  5 | import os
  6 | import json
  7 | import faulthandler
  8 | import platform
  9 | import itertools
 10 | import numpy as np
 11 | 
 12 | 
 13 | def estimate_pass_at_k(
 14 |     num_samples,
 15 |     num_correct,
 16 |     k
 17 | ):
 18 |     """
 19 |     Estimates pass@k of each problem and returns them in an array.
 20 |     """
 21 | 
 22 |     def estimator(n: int, c: int, k: int) -> float:
 23 |         """
 24 |         Calculates 1 - comb(n - c, k) / comb(n, k).
 25 |         """
 26 |         if n - c < k:
 27 |             return 1.0
 28 |         return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
 29 | 
 30 |     if isinstance(num_samples, int):
 31 |         num_samples_it = itertools.repeat(num_samples, len(num_correct))
 32 |     else:
 33 |         assert len(num_samples) == len(num_correct)
 34 |         num_samples_it = iter(num_samples)
 35 | 
 36 |     return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
 37 | 
 38 | 
 39 | def write_jsonl(filename: str, data, append: bool = False):
 40 |     """
 41 |     Writes an iterable of dictionaries to jsonl
 42 |     """
 43 |     if append:
 44 |         mode = 'ab'
 45 |     else:
 46 |         mode = 'wb'
 47 |     filename = os.path.expanduser(filename)
 48 |     if filename.endswith(".gz"):
 49 |         with open(filename, mode) as fp:
 50 |             with gzip.GzipFile(fileobj=fp, mode='wb') as gzfp:
 51 |                 for x in data:
 52 |                     gzfp.write((json.dumps(x) + "\n").encode('utf-8'))
 53 |     else:
 54 |         with open(filename, mode) as fp:
 55 |             for x in data:
 56 |                 fp.write((json.dumps(x) + "\n").encode('utf-8'))
 57 | 
 58 | 
 59 | def stream_jsonl(filename: str):
 60 |     """
 61 |     Parses each jsonl line and yields it as a dictionary
 62 |     """
 63 |     if filename.endswith(".gz"):
 64 |         with open(filename, "rb") as gzfp:
 65 |             with gzip.open(gzfp, 'rt') as fp:
 66 |                 for line in fp:
 67 |                     if any(not x.isspace() for x in line):
 68 |                         yield json.loads(line)
 69 |     else:
 70 |         with open(filename, "r") as fp:
 71 |             for line in fp:
 72 |                 if any(not x.isspace() for x in line):
 73 |                     yield json.loads(line)
 74 | 
 75 | 
 76 | def reliability_guard(maximum_memory_bytes=None):
 77 |     """
 78 |     This disables various destructive functions and prevents the generated code
 79 |     from interfering with the test (e.g. fork bomb, killing other processes,
 80 |     removing filesystem files, etc.)
 81 | 
 82 |     WARNING
 83 |     This function is NOT a security sandbox. Untrusted code, including, model-
 84 |     generated code, should not be blindly executed outside of one.
 85 |     """
 86 | 
 87 |     if maximum_memory_bytes is not None:
 88 |         import resource
 89 |         resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
 90 |         resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
 91 |         if not platform.uname().system == 'Darwin':
 92 |             resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
 93 | 
 94 |     faulthandler.disable()
 95 | 
 96 |     import builtins
 97 |     builtins.exit = None
 98 |     builtins.quit = None
 99 | 
100 |     import os
101 |     # os.environ['OMP_NUM_THREADS'] = '1'
102 | 
103 |     # os.kill = None
104 |     os.system = None
105 |     os.putenv = None
106 |     os.remove = None
107 |     os.removedirs = None
108 |     # os.rmdir = None
109 |     os.fchdir = None
110 |     os.setuid = None
111 |     os.fork = None
112 |     os.forkpty = None
113 |     os.killpg = None
114 |     os.rename = None
115 |     os.renames = None
116 |     os.truncate = None
117 |     os.replace = None
118 |     os.unlink = None
119 |     os.fchmod = None
120 |     os.fchown = None
121 |     os.chmod = None
122 |     os.chown = None
123 |     os.chroot = None
124 |     os.fchdir = None
125 |     os.lchflags = None
126 |     os.lchmod = None
127 |     os.lchown = None
128 |     os.getcwd = None
129 |     os.chdir = None
130 | 
131 |     import shutil
132 |     # shutil.rmtree = None
133 |     shutil.move = None
134 |     shutil.chown = None
135 | 
136 |     import subprocess
137 |     subprocess.Popen = None  # type: ignore
138 | 
139 |     __builtins__['help'] = None
140 | 
141 |     import sys
142 |     sys.modules['ipdb'] = None
143 |     sys.modules['joblib'] = None
144 |     sys.modules['resource'] = None
145 |     sys.modules['psutil'] = None
146 |     sys.modules['tkinter'] = None
147 | 


--------------------------------------------------------------------------------
/src/data_science_problems/execution.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import numpy as np
  5 | from tqdm import tqdm
  6 | from pathlib import Path
  7 | from collections import defaultdict
  8 | 
  9 | import nbformat
 10 | from nbclient import NotebookClient
 11 | 
 12 | from data_science_problems.read import read_problems, read_filepaths, extract_initial_comments
 13 | from data_science_problems.utils import stream_jsonl, estimate_pass_at_k, reliability_guard
 14 | from data_science_problems.progress import ProgressBar
 15 | 
 16 | 
 17 | import ray
 18 | ray.init(log_to_driver=False)
 19 | 
 20 | 
 21 | def refersh_and_save(path, fout, completions, problems):
 22 |     path = Path(path.strip())
 23 |     nb = nbformat.read(path, as_version=4)
 24 | 
 25 |     cells_json = nb["cells"]
 26 |     cells = [''.join(cell['source']) for cell in cells_json]
 27 |     for idx in range(len(cells)-1):
 28 |         if "task_id" in cells_json[idx]["metadata"]:
 29 |             task_id = cells_json[idx]["metadata"]["task_id"]
 30 | 
 31 |             # verify that it is the right cell
 32 |             comment, solution = extract_initial_comments(cells_json[idx]["source"])
 33 |             assert solution == problems[task_id]["solution"]
 34 |             assert cells_json[idx+1]["source"] == problems[task_id]["test"]
 35 |             
 36 |             for completion_id, completion in enumerate(completions[task_id]):
 37 |                 # fill the cell with generated code
 38 |                 cells_json[idx]["source"] = "#### GENERATED\n" + completion
 39 | 
 40 |                 # write the refreshed notebook to it's own file
 41 |                 task_no = task_id.split('/')[1]
 42 |                 suffix = f".{task_no}.{completion_id}.ipynb"
 43 |                 outnb = path.parent / path.parts[-1].replace(".ipynb", suffix)
 44 |                 print(outnb, file=fout)
 45 |                 nbformat.write(nb, outnb)
 46 | 
 47 |                 # fill the cell back with original code
 48 |                 cells_json[idx]["source"] = problems[task_id]["solution"]
 49 | 
 50 | 
 51 | @ray.remote
 52 | def execute(notebook_filename, actor, ferr):
 53 |     actor.update.remote(1)
 54 |     notebook_filename = Path(notebook_filename.strip())
 55 |     nb = nbformat.read(notebook_filename, as_version=4)
 56 |     parent = notebook_filename.parent
 57 |     client = NotebookClient(nb, 
 58 |         timeout=10, 
 59 |         kernel_name="python3", 
 60 |         resources= {'metadata': {'path': parent}}, 
 61 |         allow_errors=True
 62 |     )
 63 |     try:
 64 |         enb = client.execute()
 65 |     except Exception as e:
 66 |         print(notebook_filename, file=ferr)
 67 |         return
 68 |     nbformat.write(enb, notebook_filename)
 69 |     print(notebook_filename)
 70 | 
 71 | 
 72 | def has_no_error(x):
 73 |     for element in x:
 74 |         if "ename" in element:
 75 |             return False
 76 |     return True
 77 | 
 78 | 
 79 | def evaluate(path):
 80 |     path = Path(path.strip())
 81 |     nb = nbformat.read(path, as_version=4)
 82 | 
 83 |     cells_json = nb["cells"]
 84 |     cells = [''.join(cell['source']) for cell in cells_json]
 85 |     for idx in range(len(cells)-1):
 86 |         if "task_id" in cells_json[idx]["metadata"]:
 87 |             task_id = cells_json[idx]["metadata"]["task_id"]
 88 |             source = cells_json[idx]["source"]
 89 |             if "#### GENERATED" in source:
 90 | #                 print(task_id)
 91 |                 test = cells_json[idx+1]["outputs"]
 92 |                 return has_no_error(test), task_id
 93 | 
 94 | 
 95 | def evaluate_dsp(sample_file="samples.jsonl", ks=[1, 10, 100]):
 96 | 
 97 |     print("Reading the generated samples.")
 98 |     problems = read_problems()
 99 |     completions = defaultdict(list)
100 |     for s in stream_jsonl(sample_file):
101 |         completions[s["task_id"]].append(s["completion"])
102 | 
103 | 
104 |     # create new notebooks with generated code filled in
105 |     print("Saving to new notebooks with generated samples.")
106 |     ps = read_filepaths()
107 |     out_file = "generated.txt"
108 |     with open(out_file, "w") as fout:
109 |         for path in tqdm(ps, total=len(ps)):
110 |             refersh_and_save(path, fout, completions, problems)
111 | 
112 | 
113 |     # disable functionalities that can make destructive changes to the test
114 |     reliability_guard()
115 | 
116 |     # execute the notebooks with generated code
117 |     print("Execute the new notebooks with generated samples.")
118 |     with open(out_file) as f:
119 |         ps = f.readlines()
120 |     
121 |     pb = ProgressBar(len(ps))
122 |     with open("errors.txt", "w") as ferr:
123 |         tasks_pre_launch = [execute.remote(notebook_filename, pb.actor, ferr) for notebook_filename in ps]
124 |         pb.print_until_done()
125 |         tasks = ray.get(tasks_pre_launch)
126 | 
127 | 
128 |     # calculate pass@k.
129 |     print("Complute pass@k for the executed notebooks.")
130 |     with open(out_file) as f:
131 |         ps = f.readlines()
132 |     
133 |     results = defaultdict(list)
134 |     for notebook_filename in tqdm(ps):
135 |         result, task_id = evaluate(notebook_filename)
136 |         results[task_id].append(result)
137 |     
138 |     total, correct = [], []
139 |     for result in results.values():
140 |         result.sort()
141 |         passed = [bool(r) for r in result]
142 |         total.append(len(passed))
143 |         correct.append(sum(passed))
144 |     total = np.array(total)
145 |     correct = np.array(correct)
146 | 
147 |     pass_at_k = {f"pass@{k}": estimate_pass_at_k(total, correct, k).mean() \
148 |                                                 for k in ks if (total >= k).all()}
149 |     return pass_at_k
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     evaluate_dsp()
154 | 
155 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Data Science Problems
  2 | 
  3 | Evaluate a natural language code generation model on real data science pedagogical notebooks! Data Science Problems (DSP) includes well-posed data science problems in Markdown along with unit tests to verify correctness and a Docker environment for reproducible execution. About 1/3 of notebooks in this benchmark also include data dependencies, so this benchmark not only can test a model's ability to chain together complex tasks, but also evaluate the solutions on real data! See our paper [Training and Evaluating a Jupyter Notebook Data Science Assistant](https://arxiv.org/abs/2201.12901) for more details about state of the art results and other properties of the dataset.
  4 | 
  5 | ## Installation
  6 | 
  7 | This project requires Python 3.6+ and Docker to run. Assuming you have these, to get started first download and install the Python package:
  8 | 
  9 | ```
 10 | $ git clone git@github.com:microsoft/DataScienceProblems.git
 11 | $ cd DataScienceProblems/src
 12 | $ pip install -e .
 13 | ```
 14 | 
 15 | ## Usage
 16 | 
 17 | ### Reading the problems
 18 | 
 19 | Extract the `juice-github-repos.tar.gz` file from the `DataScienceProblems` repository.
 20 | 
 21 | ```
 22 | $ tar -xvzf juice-github-repos.tar.gz
 23 | ```
 24 | 
 25 | ### Data Schema
 26 | 
 27 | Here is an example of a notebook context, prompt cell (1 and the markdown), solution cell (2), and unit tests cell (3).
 28 | 
 29 | ![example problem](example.png)
 30 | 
 31 | The DSP schema corresponding to this example includes the `prompt` which is the question to be asked to the student, `solution` which is the answer to the question and `test` which is the test case to be run on the student's code.
 32 | 
 33 | ```python
 34 | {
 35 |     'notebook_path': '/path/to/the/notebook.ipynb',
 36 |     'notebook_problem_index': 0,
 37 |     'prompt': '%matplotlib inline\n'
 38 |             'import matplotlib.pyplot as plt\n'
 39 |             'import numpy as np\n'
 40 |             'import scipy.optimize as opt\n'
 41 |             '## Hat potential\n'
 42 |             'The following potential is often used in Physics and other fields '
 43 |             'to describe symmetry breaking and is often known as the "hat '
 44 |             'potential":\n'
 45 |             '\n'
 46 |             '$$ V(x) = -a x^2 + b x^4 $$\n'
 47 |             '\n'
 48 |             'Write a function `hat(x,a,b)` that returns the value of this '
 49 |             'function:\n',
 50 |     'solution': 'def hat(x,a=5.0,b=1.0):\n    return -a* x*x + b*x**4',
 51 |     'task_id': 'DSP/414',
 52 |     'test': 'assert hat(0.0, 1.0, 1.0)==0.0\n'
 53 |             'assert hat(0.0, 1.0, 1.0)==0.0\n'
 54 |             'assert hat(1.0, 10.0, 1.0)==-9.0'
 55 | }
 56 | ```
 57 | 
 58 | We provide a `read_problems` function that can be used to read the problems from the jupyter notebooks. 
 59 | 
 60 | Below is an example of how to use the `read_problems` function and use your generated code samples to save the samples to a file.
 61 | 
 62 | 
 63 | ```python
 64 | from data_science_problems.read import read_problems
 65 | from data_science_problems.utils import write_jsonl
 66 | 
 67 | problems = read_problems()
 68 | 
 69 | num_samples = 1
 70 | samples = [
 71 |     dict(task_id=task_id, completion=generate_code(problems[task_id]["prompt"]))
 72 |     for task_id in problems
 73 |     for _ in range(num_samples)
 74 | ]
 75 | write_jsonl("samples.jsonl", samples)
 76 | ```
 77 | 
 78 | 
 79 | ### Executing the problems and unit tests
 80 | 
 81 | Once you have saved the generated samples in the `samples.jsonl` file, you need to build the provided docker container, which would help you safely run the generated samples inside the container.
 82 | 
 83 | Use the following command to build the docker container.
 84 | 
 85 | ```
 86 | $ docker build --pull --rm -f "Dockerfile" -t datascienceproblems:latest "."
 87 | ```
 88 | 
 89 | Once the Docker container is built, you can execute the generated samples inside the container. You'll need to map the `/app/juice-github-repos` and `/samples/samples.jsonl` directory to the host directory where the notebooks are stored.
 90 | 
 91 | Use the following command to execute the samples inside the container.  
 92 | 
 93 | ```
 94 | $ docker run -it --rm -v $PWD/juice-github-repos:/app/juice-github-repos -v $PWD/samples.jsonl:/samples/samples.jsonl datascienceproblems /samples/samples.jsonl
 95 | ```
 96 | 
 97 | 
 98 | The `docker run` will perform the following things:
 99 | 
100 | - It will read the samples from the `samples.jsonl` file.
101 | - It will create new notebooks with the generated code samples. The list of new notebooks is saved in the `generates-notebooks.txt` file.
102 | - It will execute these new notebooks.
103 | - It will compute `pass@k` for generated samples. 
104 | 
105 | > **WARNING: Running the `docker run` command with `num_samples = 1` will create with ~1000 new notebooks and save them on your disk. This may take a while.**
106 | 
107 | ```
108 | $ docker run -it --rm -v $PWD/juice-github-repos:/app/juice-github-repos -v $PWD/samples.jsonl:/samples/samples.jsonl datascienceproblems /samples/samples.jsonl
109 | 2021-11-02 09:11:11,847 INFO services.py:1164 -- View the Ray dashboard at http://127.0.0.1:8265
110 | Reading the generated samples.
111 | 100%|███████████████████████████████████████████████| 305/305 [00:03<00:00, 97.34it/s]
112 | Saving to new notebooks with generated samples.
113 | 100%|███████████████████████████████████████████████| 305/305 [00:36<00:00,  8.47it/s]
114 | Execute the new notebooks with generated samples.
115 | 100%|███████████████████████████████████████████████| 2192/2192 [05:17<00:40,  9.49it/s]
116 | Complute pass@k for the executed notebooks.
117 | 100%|███████████████████████████████████████████████| 2192/2192 [00:28<00:00, 76.73it/s]
118 | {'pass@1': ..., 'pass@10': ...}
119 | ```
120 | 
121 | ##### Trademarks
122 | 
123 | > This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow Microsoft’s Trademark & Brand Guidelines. Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies.
124 | 
125 | ## Dataset Metadata
126 | 
127 | The following table is necessary for this dataset to be indexed by search
128 | engines such as <a href="https://g.co/datasetsearch">Google Dataset Search</a>.
129 | <div itemscope itemtype="http://schema.org/Dataset">
130 | <table>
131 |   <tr>
132 |     <th>property</th>
133 |     <th>value</th>
134 |   </tr>
135 |   <tr>
136 |     <td>name</td>
137 |     <td><code itemprop="name">Data Science Problems</code></td>
138 |   </tr>
139 |   <tr>
140 |     <td>url</td>
141 |     <td><code itemprop="url">https://github.com/microsoft/DataScienceProblems</code></td>
142 |   </tr>
143 |   <tr>
144 |     <td>sameAs</td>
145 |     <td><code itemprop="sameAs">https://github.com/microsoft/DataScienceProblems</code></td>
146 |   </tr>
147 |   <tr>
148 |     <td>description</td>
149 |     <td><code itemprop="description">
150 |         Evaluate a natural language code generation model on real data science pedagogical notebooks! 
151 |         Data Science Problems (DSP) includes well-posed data science problems in Markdown along with 
152 |         unit tests to verify correctness and a Docker environment for reproducible execution. About 
153 |         1/3 of notebooks in this benchmark also include data dependencies, so this benchmark not only 
154 |         can test a model's ability to chain together complex tasks, but also evaluate the solutions 
155 |         on real data! See our paper Training and Evaluating a Jupyter Notebook Data Science Assistant 
156 |         (https://arxiv.org/abs/2201.12901) for more details about state of the art results and other 
157 |         properties of the dataset.
158 | 	</code></td>
159 |   </tr>
160 |   <tr>
161 |     <td>citation</td>
162 |     <td><code itemprop="citation">https://arxiv.org/abs/2201.12901</code></td>
163 |   </tr>
164 |   <tr>
165 |     <td>license</td>
166 |     <td><code itemprop="license">https://github.com/microsoft/DataScienceProblems/blob/main/LICENSE.txt</code></td>
167 |   </tr>
168 | </table>
169 | </div>
170 | 
171 | 


--------------------------------------------------------------------------------
/src/data-science-notebooks.txt:
--------------------------------------------------------------------------------
  1 | juice-github-repos/mwizasimbeye11.data-science-africa-2018-abuja/data-science-africa-2018-abuja-master/PythonBasicsx.task_id.ipynb
  2 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week2_Assignment/Problem_2.task_id.ipynb
  3 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week2_Assignment/Problem_3.task_id.ipynb
  4 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week13_Assignment/Problem_2.task_id.ipynb
  5 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week13_Assignment/Problem_1.task_id.ipynb
  6 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week9_Assignment/Problem_2.task_id.ipynb
  7 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week5_Assignment/Problem_2.task_id.ipynb
  8 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week14_Assignment/Problem_2.task_id.ipynb
  9 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week4_Assignment/Problem_3.task_id.ipynb
 10 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week4_Assignment/Problem_2.task_id.ipynb
 11 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week3_Assignment/Problem_2.task_id.ipynb
 12 | juice-github-repos/jiuchenzhang.python-practice/python-practice-master/Week6_Assignment/Problem_2.task_id.ipynb
 13 | juice-github-repos/Kaleidophon.-friendly-octo-palm-tree/friendly-octo-palm-tree-master/lab1.task_id.ipynb
 14 | juice-github-repos/HussainAther.MachineLearningCourse/machinelearning-master/feature/Week10-assignment.task_id.ipynb
 15 | juice-github-repos/HussainAther.MachineLearningCourse/machinelearning-master/regression/Week7-assignment.task_id.ipynb
 16 | juice-github-repos/hacker0sean.DATA100/Data_science_2-master/lab9/lab09.task_id.ipynb
 17 | juice-github-repos/hacker0sean.DATA100/Data_science_2-master/lab8/lab08.task_id.ipynb
 18 | juice-github-repos/hacker0sean.DATA100/Data_science_2-master/hw5/hw5.task_id.ipynb
 19 | juice-github-repos/keiran-rowell.Jupyter-Notebooks/Jupyter-Notebooks-master/EXTRA_autograding_example.task_id.ipynb
 20 | juice-github-repos/mwizasimbeye11.datascienceafrica-2018-nyeri-application/datascienceafrica-2018-nyeri-application-master/Python Basics/DSA-Python-Basics.task_id.ipynb
 21 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/Assignments/A1/A1_H7031.task_id.ipynb
 22 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/Assignments/A1/A1_SettingUp_solutions.task_id.ipynb
 23 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/Assignments/Demo/Demo_solutions.task_id.ipynb
 24 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/Projects/workbooks/Demo/Demo_solutions.task_id.ipynb
 25 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/Projects/workbooks/A1/A1_H7031.task_id.ipynb
 26 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/Projects/workbooks/A1/A1_SettingUp_solutions.task_id.ipynb
 27 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/workbooks/Demo/Demo_solutions.task_id.ipynb
 28 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/workbooks/A1/A1_SettingUp_solutions.task_id.ipynb
 29 | juice-github-repos/BroderickHigby.COGS108_Repp/data_science_in_practice-master/workbooks/A1/A1_H7031.task_id.ipynb
 30 | juice-github-repos/weiHelloWorld.info490_advanced_data_science/info490_advanced_data_science-master/Week3/assignments/w3p3.task_id.ipynb
 31 | juice-github-repos/espoirMur.data-science-africa-submission/data-science-africa-submission-master/Machine Learning Basics/DSA Preparatory Notebook 1.task_id.ipynb
 32 | juice-github-repos/espoirMur.data-science-africa-submission/data-science-africa-submission-master/Python Basics/DSA-Python-Basics.task_id.ipynb
 33 | juice-github-repos/espoirMur.data-science-africa-submission/data-science-africa-submission-master/Python Basics/DSA-Python-2.task_id.ipynb
 34 | juice-github-repos/brian-cabanas.UCLAEx-Data-Science-S18/UCLAEx-Data-Science-S18-master/01-iris-02-numerical-python/03-the-python-numerical-stack.task_id.ipynb
 35 | juice-github-repos/brian-cabanas.UCLAEx-Data-Science-S18/UCLAEx-Data-Science-S18-master/02-iris-05-sampling/09-lists.task_id.ipynb
 36 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment03/ProjectEuler17.task_id.ipynb
 37 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment03/ProjectEuler52.task_id.ipynb
 38 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment10/MatplotlibExoplanets.task_id.ipynb
 39 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment07/PandasObjects.task_id.ipynb
 40 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment11/ChinookSQL.task_id.ipynb
 41 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment11/InteractEx03.task_id.ipynb
 42 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment08/MissingValues.task_id.ipynb
 43 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment12/CentralLimitTheorem.task_id.ipynb
 44 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment12/SoccerMatches.task_id.ipynb
 45 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment01/BasicVectorOps.task_id.ipynb
 46 | juice-github-repos/chazalex.DATA301/DATA301-master/assignment06/WienerProcess.task_id.ipynb
 47 | juice-github-repos/StanDimitroff.Data-Science/Data-Science-master/Data Tidying and Cleaning Lab.task_id.ipynb
 48 | juice-github-repos/StanDimitroff.Data-Science/Data-Science-master/Data Visualization and EDA Lab.task_id.ipynb
 49 | juice-github-repos/StanDimitroff.Data-Science/Data-Science-master/TitanicSurvivals/DS Project Architecture Lab.task_id.ipynb
 50 | juice-github-repos/StanDimitroff.Data-Science/Data-Science-master/ImageProcessing/Working with Images and Text Lab.task_id.ipynb
 51 | juice-github-repos/FollowJack.python-for-machine-learning/python-for-machine-learning-master/Python for Machine Learning/ExerciseSheet4/sheet4_solution.task_id.ipynb
 52 | juice-github-repos/FollowJack.python-for-machine-learning/python-for-machine-learning-master/Python for Machine Learning/Training/sheet4_solution.task_id.ipynb
 53 | juice-github-repos/mwatkin8.class_repos/class_repos-master/python_programming/mwatkin8_6018_2017/m11_files/m11_files.task_id.ipynb
 54 | juice-github-repos/mwatkin8.class_repos/class_repos-master/python_programming/mwatkin8_6018_2017/m9_calculus_review/m9_calculus.task_id.ipynb
 55 | juice-github-repos/mwatkin8.class_repos/class_repos-master/python_programming/mwatkin8_6018_2017/modules/m6-dictionaries/InClass/DictionariesInClassExercises_inclass2.task_id.ipynb
 56 | juice-github-repos/mwatkin8.class_repos/class_repos-master/python_programming/mwatkin8_6018_2017/m6_collections_dictionaries/m6_dictionaries.task_id.ipynb
 57 | juice-github-repos/mwatkin8.class_repos/class_repos-master/python_programming/mwatkin8_6018_2017/m12_oop/m12_oop_color.task_id.ipynb
 58 | juice-github-repos/mwatkin8.class_repos/class_repos-master/python_programming/mwatkin8_6018_2017/m3_numerics/m3_numerics_hw.task_id.ipynb
 59 | juice-github-repos/Aliendreamer.Python-Sql-DataScience/Python-Sql-DataScience-master/black folder/Data Visualization and EDA Lab.task_id.ipynb
 60 | juice-github-repos/Aliendreamer.Python-Sql-DataScience/Python-Sql-DataScience-master/black folder/Data Tidying and Cleaning Lab.task_id.ipynb
 61 | juice-github-repos/Aliendreamer.Python-Sql-DataScience/Python-Sql-DataScience-master/black folder/Data Visualization and EDA Labb.task_id.ipynb
 62 | juice-github-repos/Aliendreamer.Python-Sql-DataScience/Python-Sql-DataScience-master/black folder/Data Visualization and EDA Lab-org.task_id.ipynb
 63 | juice-github-repos/chapmanbe.6018_2017/6018_2017-master/modules/m6-dictionaries/InClass/DictionariesInClassExercises_inclass2.task_id.ipynb
 64 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment13/BootstrapLineFitting.task_id.ipynb
 65 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment07/PandasObjects.task_id.ipynb
 66 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment14/TitanicClassification.task_id.ipynb
 67 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment10/MatplotlibExoplanets.task_id.ipynb
 68 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment03/ProjectEuler52.task_id.ipynb
 69 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment03/ProjectEuler17.task_id.ipynb
 70 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment09/TitanicGroupAgg.task_id.ipynb
 71 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment06/WienerProcess.task_id.ipynb
 72 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment06/PeakFinding.task_id.ipynb
 73 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment01/BasicVectorOps.task_id.ipynb
 74 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment12/SoccerMatches.task_id.ipynb
 75 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment12/CentralLimitTheorem.task_id.ipynb
 76 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment12/SpeedOfLight.task_id.ipynb
 77 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment08/MissingValues.task_id.ipynb
 78 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment11/ChinookSQL.task_id.ipynb
 79 | juice-github-repos/declanvk.data301-projects/data301-projects-master/assignment11/InteractEx03.task_id.ipynb
 80 | juice-github-repos/SleepSux.2018-uclax-datascience/2018-uclax-datascience-master/01-iris-02-numerical-python/03-the-python-numerical-stack.task_id.ipynb
 81 | juice-github-repos/rekeeley.MLS_HW/MLS_HW-master/homework/Homework7.task_id.ipynb
 82 | juice-github-repos/UvA-FNWI.notebook-exams/notebook-exams-master/Using_notebooks_for_exams/Using_nbgrader/nbgrader-template.task_id.ipynb
 83 | juice-github-repos/xiaoxiaoyu0302.xiao-s-projects/Python4Geoscience-notes-hw-master/hw1_ocng669.task_id.ipynb
 84 | juice-github-repos/xiaoxiaoyu0302.xiao-s-projects/Python4Geoscience-notes-hw-master/hw3_ocng669.task_id.ipynb
 85 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/proj2B/proj2_part3.task_id.ipynb
 86 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/lab03/lab03.task_id.ipynb
 87 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/lab10/lab10.task_id.ipynb
 88 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/hw0/hw0.task_id.ipynb
 89 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/hw2/hw2.task_id.ipynb
 90 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/lab05/lab05.task_id.ipynb
 91 | juice-github-repos/clzls119.berkeley/berkeley-master/ds100/hw1/hw1.task_id.ipynb
 92 | juice-github-repos/jeff3780.DSE200x/edX_DSE200x-master/wk3_Numpy/numpy Exercise Notebook-SOLUTIONS.task_id.ipynb
 93 | juice-github-repos/Venciity.Data-Science/Data-Science-Softuni-Course-master/02. Data-Tidying-and-Cleaning/Lab/Data Tidying and Cleaning Lab.task_id.ipynb
 94 | juice-github-repos/Venciity.Data-Science/Data-Science-Softuni-Course-master/03. Data-Visualization-Exploratory-Data-Analysis/Lab/Data Visualization and EDA Lab.task_id.ipynb
 95 | juice-github-repos/Venciity.Data-Science/Data-Science-Softuni-Course-master/06. Regression-Models/Lab/Regression Models Lab.task_id.ipynb
 96 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment01/ProjectEuler1.task_id.ipynb
 97 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment01/ProjectEuler6.task_id.ipynb
 98 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment01/BasicVectorOps.task_id.ipynb
 99 | juice-github-repos/chewy06.DataScience/DataScience-master/midterm/CentralLimitTheorem.task_id.ipynb
100 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment06/LogNormalSimulation.task_id.ipynb
101 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment06/BootstrapCoinToss.task_id.ipynb
102 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:43:23 UTC/BasicVectorOps.task_id.ipynb
103 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:33:54 UTC/BasicVectorOps.task_id.ipynb
104 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:33:39 UTC/BasicVectorOps.task_id.ipynb
105 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:21:14 UTC/BasicVectorOps.task_id.ipynb
106 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment07+2016-02-09 20:47:35 UTC/HatPotential.task_id.ipynb
107 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+midterm+2016-02-02 23:32:18 UTC/CentralLimitTheorem.task_id.ipynb
108 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment06+2016-02-02 20:12:18 UTC/BootstrapCoinToss.task_id.ipynb
109 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment06+2016-02-02 20:12:18 UTC/LogNormalSimulation.task_id.ipynb
110 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:48:40 UTC/BasicVectorOps.task_id.ipynb
111 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:48:40 UTC/ProjectEuler6.task_id.ipynb
112 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment05+2016-01-28 21:12:40 UTC/SoccerMatches.task_id.ipynb
113 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment04+2016-01-26 21:54:06 UTC/WienerProcess.task_id.ipynb
114 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment03+2016-01-19 21:23:36 UTC/ProjectEuler52.task_id.ipynb
115 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment06+2016-02-02 21:17:06 UTC/BootstrapCoinToss.task_id.ipynb
116 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment06+2016-02-02 21:17:06 UTC/LogNormalSimulation.task_id.ipynb
117 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment03+2016-01-19 21:15:41 UTC/ProjectEuler52.task_id.ipynb
118 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+midterm+2016-02-02 23:30:21 UTC/CentralLimitTheorem.task_id.ipynb
119 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment04+2016-01-26 23:14:49 UTC/WienerProcess.task_id.ipynb
120 | juice-github-repos/chewy06.DataScience/DataScience-master/.local/share/jupyter/nbgrader_cache/data301a/chewy06+assignment01+2016-01-12 21:34:55 UTC/BasicVectorOps.task_id.ipynb
121 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment05/SoccerMatches.task_id.ipynb
122 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment07/HatPotential.task_id.ipynb
123 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment04/WienerProcess.task_id.ipynb
124 | juice-github-repos/chewy06.DataScience/DataScience-master/assignment03/ProjectEuler52.task_id.ipynb
125 | juice-github-repos/Jihunlee326.DLC/DeepLearningCollege-master/01_programming/Assign_solutions.task_id.ipynb
126 | juice-github-repos/Jihunlee326.DLC/DeepLearningCollege-master/01_programming/Indexing_solutions.task_id.ipynb
127 | juice-github-repos/RuofanW.1007/DS-GA-1007-Programming-for-DS-master/02_Numpy_I_Ruofan.task_id.ipynb
128 | juice-github-repos/KostadinovK.Data-Science/Data-Science-master/02-Data-Tidyng-And-Cleaning/Data Tidying and Cleaning Lab.task_id.ipynb
129 | juice-github-repos/KostadinovK.Data-Science/Data-Science-master/06-Regression-Models/Regression Models Lab.task_id.ipynb
130 | juice-github-repos/KostadinovK.Data-Science/Data-Science-master/04-Working-with-Images-And-Text/Working with Images and Text Lab.task_id.ipynb
131 | juice-github-repos/kabartay.EdX-UCSanDiegoX-DSE200x/EdX-UCSanDiegoX-DS-master/DSE200x/Week-3-Exercises/Solutions/numpy_Exercise_Notebook-SOLUTIONS.task_id.ipynb
132 | juice-github-repos/allstonfojas.COGS108_REPO/COGS108_Repo-master/Assignments/A1/A1_F4143.task_id.ipynb
133 | juice-github-repos/allstonfojas.COGS108_REPO/COGS108_Repo-master/Assignments/Demo/Demo_solutions.task_id.ipynb
134 | juice-github-repos/tcloudb.COGS_108/COGS_108-master/Assignments/A1/A1_B9430.task_id.ipynb
135 | juice-github-repos/vlsalina.COGS108-Repo/COGS108-Repo-master/A5/A5_L7263.task_id.ipynb
136 | juice-github-repos/vlsalina.COGS108-Repo/COGS108-Repo-master/A1/A1_SettingUp_solutions.task_id.ipynb
137 | juice-github-repos/levishutts.Projects/Projects-master/Python/IPython Notebook/Project 1/hello.task_id.ipynb
138 | juice-github-repos/Ventsislav-Yordanov.Machine-Learning-Softuni-Course/Machine-Learning-Softuni-Course-master/05. Support-Vector-Machines/Lab/Trees and SVMs Lab.task_id.ipynb
139 | juice-github-repos/Ventsislav-Yordanov.Machine-Learning-Softuni-Course/Machine-Learning-Softuni-Course-master/03. Model-Training-and-Improvement/Lab/Model Training and Improvement Lab.task_id.ipynb
140 | juice-github-repos/osunadavid.COGS108_Repo/COGS108_Repo-master/A5/A5_O5524.task_id.ipynb
141 | juice-github-repos/arunsug.Assignments5/Assignments5-master/A1/A1_SettingUp_Solutions.task_id.ipynb
142 | juice-github-repos/GutHub88.COGS108_Repo/COGS108_Repo-master/A1_G0579.task_id.ipynb
143 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment05+2016-01-28 21:21:16 UTC/SoccerMatches.task_id.ipynb
144 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment07+2016-02-11 21:19:41 UTC/HatPotential.task_id.ipynb
145 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment07+2016-02-09 21:17:09 UTC/HatPotential.task_id.ipynb
146 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment06+2016-02-02 21:33:31 UTC/MatplotlibEx02.task_id.ipynb
147 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment06+2016-02-02 21:33:31 UTC/MatplotlibEx01.task_id.ipynb
148 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment06+2016-02-02 21:33:31 UTC/LogNormalSimulation.task_id.ipynb
149 | juice-github-repos/nicksinai.DATA301/DATA301-master/.local/share/jupyter/nbgrader_cache/data301a/nicksinai+assignment06+2016-02-02 21:33:31 UTC/BootstrapCoinToss.task_id.ipynb
150 | juice-github-repos/nicksinai.DATA301/DATA301-master/assignment06/BootstrapCoinToss.task_id.ipynb
151 | juice-github-repos/nicksinai.DATA301/DATA301-master/assignment06/MatplotlibEx01.task_id.ipynb
152 | juice-github-repos/nicksinai.DATA301/DATA301-master/assignment06/MatplotlibEx02.task_id.ipynb
153 | juice-github-repos/nicksinai.DATA301/DATA301-master/midterm/CharEntropy.task_id.ipynb
154 | juice-github-repos/nicksinai.DATA301/DATA301-master/assignment07/HatPotential.task_id.ipynb
155 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz1/Quiz_norm.task_id.ipynb
156 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz6/Quiz_Perceptron.task_id.ipynb
157 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz12/Quiz_backpropagation.task_id.ipynb
158 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz2/Quiz_Gauss.task_id.ipynb
159 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz8/Quiz_minimum_distance_as_linear.task_id.ipynb
160 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz5/Quiz_confusion_matrix.task_id.ipynb
161 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz11/Quiz_k-means.task_id.ipynb
162 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz13/Quiz_nn_forward_pass.task_id.ipynb
163 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz7/Quiz_SVM_example_numerical_sklearn.task_id.ipynb
164 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz10/Quiz_LDA.task_id.ipynb
165 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz4/Quiz_reglin-sgd.task_id.ipynb
166 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz4/Quiz_reglin-sgd_multi.task_id.ipynb
167 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz9/Quiz_Gini.task_id.ipynb
168 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/PCA/PCA.task_id.ipynb
169 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/network/02_network.task_id.ipynb
170 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/Logistic_regression/Logistic_regression.task_id.ipynb
171 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/kNN/min_distance.task_id.ipynb
172 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/kNN/kNN.task_id.ipynb
173 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/Numpy/numpy_linalg_nbgrader.task_id.ipynb
174 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/Numpy/basic_numpy_nbgrader.task_id.ipynb
175 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/katowice/introAI/Bayes/Naive_Bayes_5steps.task_id.ipynb
176 | juice-github-repos/gertingold.jupyter4edu/jupyter4edu-master/augsburg/exercises/Pi/Pi.task_id.ipynb
177 | juice-github-repos/lamalor.ds100/ds100-master/lab08/lab08.task_id.ipynb
178 | juice-github-repos/lamalor.ds100/ds100-master/hw5/hw5.task_id.ipynb
179 | juice-github-repos/lamalor.ds100/ds100-master/lab12/lab12.task_id.ipynb
180 | juice-github-repos/lamalor.ds100/ds100-master/hw1/hw1.task_id.ipynb
181 | juice-github-repos/lamalor.ds100/ds100-master/hw3/hw3.task_id.ipynb
182 | juice-github-repos/lamalor.ds100/ds100-master/lab09/lab09.task_id.ipynb
183 | juice-github-repos/jmuresan.DATA_301_Projects/DATA_301_Projects-master/assignment04/WienerProcess.task_id.ipynb
184 | juice-github-repos/jmuresan.DATA_301_Projects/DATA_301_Projects-master/assignment06/LogNormalSimulation.task_id.ipynb
185 | juice-github-repos/jmuresan.DATA_301_Projects/DATA_301_Projects-master/assignment06/BootstrapLineFitting.task_id.ipynb
186 | juice-github-repos/jmuresan.DATA_301_Projects/DATA_301_Projects-master/midterm/PeakFinding.task_id.ipynb
187 | juice-github-repos/jmuresan.DATA_301_Projects/DATA_301_Projects-master/midterm/CentralLimitTheorem.task_id.ipynb
188 | juice-github-repos/JeffreyZLee.COGS108_Repo/COGS108_Repo-master/A1/A1_A12615927.task_id.ipynb
189 | juice-github-repos/cinematica-robot-master.practica1/practica1-master/Problemas.task_id.ipynb
190 | juice-github-repos/brianlyu.dataScience108/dataScience108-master/A1/A1_L5359.task_id.ipynb
191 | juice-github-repos/brianlyu.dataScience108/dataScience108-master/Demo/Demo_solutions.task_id.ipynb
192 | juice-github-repos/germann12346.Assignments/Assignments-master/Demo/Demo_solutions.task_id.ipynb
193 | juice-github-repos/germann12346.Assignments/Assignments-master/A1/A1_P1973.task_id.ipynb
194 | juice-github-repos/germann12346.Assignments/Assignments-master/A1/A1_SettingUp_solutions.task_id.ipynb
195 | juice-github-repos/Broch76.CPN321/CPN321-master/C2_handout.task_id.ipynb
196 | juice-github-repos/mariaNikolova.Data-Science/Data-Science-master/Data Tidying and Cleaning/Data Tidying and Cleaning Lab.task_id.ipynb
197 | juice-github-repos/mariaNikolova.Data-Science/Data-Science-master/Working with Images and Text/Working with Images and Text Lab.task_id.ipynb
198 | juice-github-repos/mariaNikolova.Data-Science/Data-Science-master/Regression Models Lab/Regression Models Lab.task_id.ipynb
199 | juice-github-repos/mariaNikolova.Data-Science/Data-Science-master/Data Visualization. Exploratory Data Analysis/Data Visualization and EDA Lab.task_id.ipynb
200 | juice-github-repos/mariaNikolova.Data-Science/Data-Science-master/Data Science Project Architecture/DS Project Architecture Lab.task_id.ipynb
201 | juice-github-repos/chenjiunhan.UvA_AI_record/UvA_AI_record-master/ml1/11649712_11844523_lab1.task_id.ipynb
202 | juice-github-repos/karlaeavila.COGS108_RepoA3/COGS108_RepoA3-master/A3/Assignments/Demo/Demo_solutions.task_id.ipynb
203 | juice-github-repos/ardntshop.MyAssignments/MyAssignments-master/A1/A1_R9050.task_id.ipynb
204 | juice-github-repos/rcw712.rcw712/rcw712-master/m12_oop/m12_oop_color.task_id.ipynb
205 | juice-github-repos/rcw712.rcw712/rcw712-master/m9_calculus_review/m9_calculus.task_id.ipynb
206 | juice-github-repos/rcw712.rcw712/rcw712-master/m11_files/m11_files.task_id.ipynb
207 | juice-github-repos/rcw712.rcw712/rcw712-master/m3_numerics/m3_numerics_hw.task_id.ipynb
208 | juice-github-repos/rcw712.rcw712/rcw712-master/m4_collections/m4_collections_hw.task_id.ipynb
209 | juice-github-repos/biom262.biom262-2016/biom262-2016-master/weeks/week04/0_alignment_expression_quantification.task_id.ipynb
210 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps10/MatplotlibExoplanets.task_id.ipynb
211 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps3/ProjectEuler17.task_id.ipynb
212 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps3/ProjectEuler52.task_id.ipynb
213 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps11+2018-02-27 16:12:29.112101 UTC/SoccerMatches.task_id.ipynb
214 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps11+2018-02-27 16:12:29.112101 UTC/CentralLimitTheorem.task_id.ipynb
215 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps12+2018-03-01 16:18:02.209521 UTC/MLEPoisson.task_id.ipynb
216 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps12+2018-03-01 16:18:02.209521 UTC/HatPotential.task_id.ipynb
217 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps3+2018-01-23 16:32:57.380188 UTC/ProjectEuler17.task_id.ipynb
218 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps3+2018-01-23 16:32:57.380188 UTC/ProjectEuler52.task_id.ipynb
219 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps6+2018-02-01 16:09:55.510811 UTC/CompleteGraphLaplacian.task_id.ipynb
220 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps1+2018-01-16 16:32:52.695135 UTC/BasicVectorOps.task_id.ipynb
221 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps1+2018-01-16 16:32:52.695135 UTC/ProjectEuler1.task_id.ipynb
222 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps8+2018-02-13 16:12:17.430982 UTC/PandasObjects.task_id.ipynb
223 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps3+2018-01-23 17:00:04.791058 UTC/ProjectEuler17.task_id.ipynb
224 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps3+2018-01-23 17:00:04.791058 UTC/ProjectEuler52.task_id.ipynb
225 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps8+2018-02-13 16:21:19.604942 UTC/PandasObjects.task_id.ipynb
226 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps3+2018-01-23 09:34:11.542668 UTC/ProjectEuler52.task_id.ipynb
227 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps3+2018-01-23 09:34:11.542668 UTC/ProjectEuler17.task_id.ipynb
228 | juice-github-repos/cewalden.data301classwork/data301classwork-master/.local/share/jupyter/nbgrader_cache/data301/cewalden+ps10+2018-02-22 16:11:45.959229 UTC/MatplotlibExoplanets.task_id.ipynb
229 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps8/PandasObjects.task_id.ipynb
230 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps11/SoccerMatches.task_id.ipynb
231 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps11/CentralLimitTheorem.task_id.ipynb
232 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps1/ProjectEuler1.task_id.ipynb
233 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps1/BasicVectorOps.task_id.ipynb
234 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps12/HatPotential.task_id.ipynb
235 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps12/MLEPoisson.task_id.ipynb
236 | juice-github-repos/cewalden.data301classwork/data301classwork-master/ps6/CompleteGraphLaplacian.task_id.ipynb
237 | juice-github-repos/ecpierce.biom262-2016/biom262-2016-master/weeks/week04/0_alignment_expression_quantification.task_id.ipynb
238 | juice-github-repos/kchan23.2018-spr-data-science/2018-spr-data-science-master/01-iris-02-numerical-python/03-the-python-numerical-stack.task_id.ipynb
239 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment04/CompleteGraphLaplacian.task_id.ipynb
240 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment04/WienerProcess.task_id.ipynb
241 | juice-github-repos/dorfman.data301-work/data301-work-master/midterm/PeakFinding.task_id.ipynb
242 | juice-github-repos/dorfman.data301-work/data301-work-master/midterm/CentralLimitTheorem.task_id.ipynb
243 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment03/ProjectEuler52.task_id.ipynb
244 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment01/BasicVectorOps.task_id.ipynb
245 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment06/MatplotlibEx02.task_id.ipynb
246 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment06/MatplotlibEx01.task_id.ipynb
247 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment06/LogNormalSimulation.task_id.ipynb
248 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment06/BootstrapLineFitting.task_id.ipynb
249 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment06/BootstrapCoinToss.task_id.ipynb
250 | juice-github-repos/dorfman.data301-work/data301-work-master/assignment05/SoccerMatches.task_id.ipynb
251 | juice-github-repos/saverymax.machine_learning/machine_learning-master/week_7/Week7-assignment.task_id.ipynb
252 | juice-github-repos/saverymax.machine_learning/machine_learning-master/week_2/max-savery_week2-assignment.task_id.ipynb
253 | juice-github-repos/swiftwind501.COGS108_Repo/COGS108_Repo-master/A1_A11970626.task_id.ipynb
254 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/augsburg/exercises/Pi/Pi.task_id.ipynb
255 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/kNN/kNN.task_id.ipynb
256 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/kNN/min_distance.task_id.ipynb
257 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/Bayes/Naive_Bayes_5steps.task_id.ipynb
258 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/Numpy/basic_numpy_nbgrader.task_id.ipynb
259 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/Numpy/numpy_linalg_nbgrader.task_id.ipynb
260 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/Logistic_regression/Logistic_regression.task_id.ipynb
261 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/PCA/PCA.task_id.ipynb
262 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI/network/02_network.task_id.ipynb
263 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz7/Quiz_SVM_example_numerical_sklearn.task_id.ipynb
264 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz11/Quiz_k-means.task_id.ipynb
265 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz4/Quiz_reglin-sgd.task_id.ipynb
266 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz4/Quiz_reglin-sgd_multi.task_id.ipynb
267 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz12/Quiz_backpropagation.task_id.ipynb
268 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz9/Quiz_Gini.task_id.ipynb
269 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz10/Quiz_LDA.task_id.ipynb
270 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz1/Quiz_norm.task_id.ipynb
271 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz6/Quiz_Perceptron.task_id.ipynb
272 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz13/Quiz_nn_forward_pass.task_id.ipynb
273 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz8/Quiz_minimum_distance_as_linear.task_id.ipynb
274 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz2/Quiz_Gauss.task_id.ipynb
275 | juice-github-repos/marcinofulus.jupyter4edu/jupyter4edu-master/katowice/introAI_Quizzes/Quiz5/Quiz_confusion_matrix.task_id.ipynb
276 | juice-github-repos/Yiming-Gao.UIUC-Spring-2017/UIUC-Spring-2017-master/INFO-490/Week3/Assignments/w3p3.task_id.ipynb
277 | juice-github-repos/zdhali.ZZD/ZZD-master/COGS 108/A1_SettingUp.task_id.ipynb
278 | juice-github-repos/CVaranese.Data301/Data301-master/ps10/MatplotlibExoplanets.task_id.ipynb
279 | juice-github-repos/CVaranese.Data301/Data301-master/ps3/ProjectEuler52.task_id.ipynb
280 | juice-github-repos/CVaranese.Data301/Data301-master/ps3/ProjectEuler17.task_id.ipynb
281 | juice-github-repos/CVaranese.Data301/Data301-master/ps13/BootstrapLineFitting.task_id.ipynb
282 | juice-github-repos/CVaranese.Data301/Data301-master/ps11/SoccerMatches.task_id.ipynb
283 | juice-github-repos/CVaranese.Data301/Data301-master/ps11/CentralLimitTheorem.task_id.ipynb
284 | juice-github-repos/CVaranese.Data301/Data301-master/ps11/SpeedOfLight.task_id.ipynb
285 | juice-github-repos/CVaranese.Data301/Data301-master/ps6/CompleteGraphLaplacian.task_id.ipynb
286 | juice-github-repos/CVaranese.Data301/Data301-master/ps1/BasicVectorOps.task_id.ipynb
287 | juice-github-repos/CVaranese.Data301/Data301-master/ps12/MLEPoisson.task_id.ipynb
288 | juice-github-repos/CVaranese.Data301/Data301-master/ps12/HatPotential.task_id.ipynb
289 | juice-github-repos/ruan-andy.COGS108_Repo/COGS108_Repo-master/A1/A1_A13952215.task_id.ipynb
290 | juice-github-repos/georgivelikov.SoftUni/SoftUni-master/Data Science/Lecture 4/Data Tidying and Cleaning Lab.task_id.ipynb
291 | juice-github-repos/georgivelikov.SoftUni/SoftUni-master/Data Science/Lecture 6/Working with Images and Text Lab.task_id.ipynb
292 | juice-github-repos/georgivelikov.SoftUni/SoftUni-master/Data Science/Lecture 5/Data Visualization and EDA Lab.task_id.ipynb
293 | juice-github-repos/georgivelikov.SoftUni/SoftUni-master/Data Science/Lecture 8/06. Regression-Models-Lab/Regression Models Lab.task_id.ipynb
294 | juice-github-repos/mcx2576.python_project/python_project-master/Machine_Learning/11813105_11853018_10325948_Lab#1.task_id.ipynb
295 | juice-github-repos/adityassrana.BITS-F312---Neural-Networks-and-Fuzzy-Logic/BITS-F312-Neural-Networks-and-Fuzzy-Logic-master/2019/ZomatoRecommendationSystem/Assignment2.task_id.ipynb
296 | juice-github-repos/adityassrana.BITS-F312---Neural-Networks-and-Fuzzy-Logic/BITS-F312-Neural-Networks-and-Fuzzy-Logic-master/2018/Online Test 1/1. Neural Activation.task_id.ipynb
297 | juice-github-repos/adityassrana.BITS-F312---Neural-Networks-and-Fuzzy-Logic/BITS-F312-Neural-Networks-and-Fuzzy-Logic-master/2018/Online Test 1/3. Nearest Redi Problem.task_id.ipynb
298 | juice-github-repos/adityassrana.BITS-F312---Neural-Networks-and-Fuzzy-Logic/BITS-F312-Neural-Networks-and-Fuzzy-Logic-master/2018/Solutions/Mock Test 1/Softmax.task_id.ipynb
299 | juice-github-repos/adityassrana.BITS-F312---Neural-Networks-and-Fuzzy-Logic/BITS-F312-Neural-Networks-and-Fuzzy-Logic-master/2018/Solutions/Mock Test 1/Convolution Conundrum.task_id.ipynb
300 | juice-github-repos/adityassrana.BITS-F312---Neural-Networks-and-Fuzzy-Logic/BITS-F312-Neural-Networks-and-Fuzzy-Logic-master/2018/Solutions/Online Test 1/3. Nearest Redi Problem.task_id.ipynb
301 | juice-github-repos/graskovi.COGS108_Repo/COGS108_Repo-master/A2/A2_R6931.task_id.ipynb
302 | juice-github-repos/graskovi.COGS108_Repo/COGS108_Repo-master/A1/A1_R6931.task_id.ipynb
303 | juice-github-repos/graskovi.COGS108_Repo/COGS108_Repo-master/Demo/Demo_solutions.task_id.ipynb
304 | juice-github-repos/apauliuc.MachineLearning2_Labs/MachineLearning2_Labs-master/lab3/lab3.task_id.ipynb
305 | juice-github-repos/Khangle92.datascienceUCLA/datascienceUCLA-master/uclax-data-science/01-iris-02-numerical-python/03-the-python-numerical-stack.task_id.ipynb
306 | 


--------------------------------------------------------------------------------