├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
└── datasets
    ├── PandasEval1.json
    └── PandasEval2.json


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | This project welcomes contributions and suggestions. Most contributions require you to
 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to,
 5 | and actually do, grant us the rights to use your contribution. For details, visit
 6 | https://cla.microsoft.com.
 7 | 
 8 | ## Code of conduct
 9 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
10 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
11 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Jigsaw Datasets
 2 | The datasets folder contains the two datasets described in our [paper](https://arxiv.org/abs/2112.02969) -
 3 | 
 4 | 1.) PandasEval1 - This dataset was collected by authors of the paper and consists of 68 entries  
 5 | 
 6 | 2.) PandasEval2 - This dataset was collected in the form of a hackathon user study across two sessions differentiating tasks. Each task contains multiple sets with minor variations such as scalar/constant differences. Some tasks might have semantically different sets. It comprises of 21 unique tasks, and for every task at most 5 variations/sets. For each set there are multiple natural language variations leading to a total of 725 entries.
 7 | 
 8 | Both of these jsons follow the structure as described below. 
 9 | 
10 | * The outermost level contains key-value pairs with the unique task id. 
11 | * For each task, we have key-value pairs for the various sets in the task. 
12 | * For each set, we have 
13 |     - a list of queries along with user-ids who wrote those queries 
14 |     - one or more io examples. Each io example is a dict containing 
15 |         + code snippet for inputs
16 |         + code snippet for output 
17 |         + corresponding names for inputs and outputs
18 |     - one or more correct solutions
19 | 
20 | In case you find this work useful, please cite it as
21 | ```
22 | @inproceedings{Jigsaw,
23 |  author = {Jain, Naman and Vaidyanath, Skanda and Iyer, Arun and Natarajan, Nagarajan and Parthasarathy, Suresh and Rajamani, Sriram and Sharma, Rahul},
24 |  title = {Jigsaw: Large Language Models meet Program Synthesis},
25 |  booktitle = {ICSE 2022},
26 |  location = {Pittsburgh, Pennsylvania},
27 | }  
28 | ```


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/datasets/PandasEval1.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "0": {
   3 |     "sets": {
   4 |       "A": {
   5 |         "queries": [
   6 |           {
   7 |             "query": "Retain rows from dataframe df1 where value of EPS is not equal to 89",
   8 |             "user": "u0"
   9 |           }
  10 |         ],
  11 |         "ios": [
  12 |           {
  13 |             "inputs": [
  14 |               "pd.DataFrame({'STK_ID': {0: 601166.0, 1: 600036.0, 2: 600016.0, 3: 601009.0, 4: 601939.0, 5: 1.0}, 'RPT_Date': {0: 20111231.0, 1: 20111231.0, 2: 20111231.0, 3: 20111231.0, 4: 20111231.0, 5: 20111231.0}, 'STK_ID1': {0: 601166.0, 1: 600036.0, 2: 600016.0, 3: 601009.0, 4: 601939.0, 5: 1.0}, 'EPS': {0: 89.0, 1: 89.0, 2: 4.3, 3: 89.0, 4: 2.5, 5: 89.0}, 'cash': {0: 0.0, 1: 12.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0}})"
  15 |             ],
  16 |             "output": "pd.DataFrame({'STK_ID': {2: 600016.0, 4: 601939.0}, 'RPT_Date': {2: 20111231.0, 4: 20111231.0}, 'STK_ID1': {2: 600016.0, 4: 601939.0}, 'EPS': {2: 4.3, 4: 2.5}, 'cash': {2: 0.0, 4: 0.0}})",
  17 |             "invars": [
  18 |               "df1"
  19 |             ],
  20 |             "outvar": "dfout"
  21 |           }
  22 |         ],
  23 |         "solutions": [
  24 |           [
  25 |             "dfout = df1[(df1['EPS'] != 89)]"
  26 |           ]
  27 |         ]
  28 |       }
  29 |     }
  30 |   },
  31 |   "1": {
  32 |     "sets": {
  33 |       "A": {
  34 |         "queries": [
  35 |           {
  36 |             "query": "take first three rows in df1 and assign it to dfout",
  37 |             "user": "u0"
  38 |           }
  39 |         ],
  40 |         "ios": [
  41 |           {
  42 |             "inputs": [
  43 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
  44 |             ],
  45 |             "output": "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2}, 'B': {0: 2, 1: 4, 2: 6}})",
  46 |             "invars": [
  47 |               "df1"
  48 |             ],
  49 |             "outvar": "dfout"
  50 |           }
  51 |         ],
  52 |         "solutions": [
  53 |           [
  54 |             "dfout = df1[:3]"
  55 |           ]
  56 |         ]
  57 |       }
  58 |     }
  59 |   },
  60 |   "2": {
  61 |     "sets": {
  62 |       "A": {
  63 |         "queries": [
  64 |           {
  65 |             "query": "Delete the rows of data frame 'df2' from 'df1' and assign the result back to 'df1'",
  66 |             "user": "u0"
  67 |           }
  68 |         ],
  69 |         "ios": [
  70 |           {
  71 |             "inputs": [
  72 |               "pd.DataFrame({'C1': {0: 'E1', 1: 'E3'}, 'C2': {0: 'E2', 1: 'E4'}})",
  73 |               "pd.DataFrame({'C1': {0: 'E1'}, 'C2': {0: 'E2'}})"
  74 |             ],
  75 |             "output": "pd.DataFrame({'C1': {1: 'E3'}, 'C2': {1: 'E4'}})",
  76 |             "invars": [
  77 |               "df1",
  78 |               "df2"
  79 |             ],
  80 |             "outvar": "df1"
  81 |           },
  82 |           {
  83 |             "inputs": [
  84 |               "pd.DataFrame({'one': {0: 1, 1: 4, 2: 7, 3: 10}, 'two': {0: 2, 1: 5, 2: 8, 3: 11}, 'three': {0: 3, 1: 6, 2: 9, 3: 12}})",
  85 |               "pd.DataFrame({'one': {0: 1, 1: 7}, 'two': {0: 2, 1: 8}, 'three': {0: 3, 1: 9}})"
  86 |             ],
  87 |             "output": "pd.DataFrame({'one': {0: 4, 1: 10}, 'two': {0: 5, 1: 11}, 'three': {0: 6, 1: 12}})",
  88 |             "invars": [
  89 |               "df1",
  90 |               "df2"
  91 |             ],
  92 |             "outvar": "df1"
  93 |           }
  94 |         ],
  95 |         "solutions": [
  96 |           [
  97 |             "df1 = df1.drop(df2.index)"
  98 |           ]
  99 |         ]
 100 |       }
 101 |     }
 102 |   },
 103 |   "3": {
 104 |     "sets": {
 105 |       "A": {
 106 |         "queries": [
 107 |           {
 108 |             "query": "Given the dataframe df1 and replace all instances of 5 in column 'id' with 1005, column 'idnew' with -1005 and assign it to dfout",
 109 |             "user": "u0"
 110 |           }
 111 |         ],
 112 |         "ios": [
 113 |           {
 114 |             "inputs": [
 115 |               "pd.DataFrame({'id': {0: 5, 1: -45, 2: 16, 3: -1, 4: 111}, 'idnew': {0: 15, 1: 18, 2: 16, 3: -111, 4: 5}, 'A': {0: 'a', 1: 'x', 2: 'r', 3: 'uprime', 4: 'None'}, 'B': {0: 'z', 1: 'None', 2: 'x', 3: 'wwww', 4: 'xy'}})"
 116 |             ],
 117 |             "output": "pd.DataFrame({'id': {0: 1005, 1: -45, 2: 16, 3: -1, 4: 111}, 'idnew': {0: 15, 1: 18, 2: 16, 3: -111, 4: -1005}, 'A': {0: 'a', 1: 'x', 2: 'r', 3: 'uprime', 4: 'None'}, 'B': {0: 'z', 1: 'None', 2: 'x', 3: 'wwww', 4: 'xy'}})",
 118 |             "invars": [
 119 |               "df1"
 120 |             ],
 121 |             "outvar": "dfout"
 122 |           }
 123 |         ],
 124 |         "solutions": [
 125 |           [
 126 |             "dfout = df1.replace(to_replace={\n    'id': {\n        5: 1005,\n    },\n    'idnew': {\n        5: (- 1005),\n    },\n}, method='ffill')",
 127 |             "dfout = df1.replace(to_replace={\n    'id': {\n        5: 1005,\n    },\n    'idnew': {\n        5: (- 1005),\n    },\n})",
 128 |             "dfout = df1.replace(to_replace={\n    'id': {\n        5: 1005,\n    },\n    'idnew': {\n        5: (- 1005),\n    },\n}, method='pad')",
 129 |             "dfout = df1.replace(to_replace={\n    'id': {\n        5: 1005,\n    },\n    'idnew': {\n        5: (- 1005),\n    },\n}, method='bfill')"
 130 |           ]
 131 |         ]
 132 |       }
 133 |     }
 134 |   },
 135 |   "4": {
 136 |     "sets": {
 137 |       "A": {
 138 |         "queries": [
 139 |           {
 140 |             "query": "Delete the rows of data frame 'df2' from 'df1' and assign the result back to 'df2'",
 141 |             "user": "u0"
 142 |           }
 143 |         ],
 144 |         "ios": [
 145 |           {
 146 |             "inputs": [
 147 |               "pd.DataFrame({'C1': {0: 'E1', 1: 'E3'}, 'C2': {0: 'E2', 1: 'E4'}})",
 148 |               "pd.DataFrame({'C1': {0: 'E1'}, 'C2': {0: 'E2'}})"
 149 |             ],
 150 |             "output": "pd.DataFrame({'C1': {1: 'E3'}, 'C2': {1: 'E4'}})",
 151 |             "invars": [
 152 |               "df1",
 153 |               "df2"
 154 |             ],
 155 |             "outvar": "df2"
 156 |           },
 157 |           {
 158 |             "inputs": [
 159 |               "pd.DataFrame({'one': {0: 1, 1: 4, 2: 7, 3: 10}, 'two': {0: 2, 1: 5, 2: 8, 3: 11}, 'three': {0: 3, 1: 6, 2: 9, 3: 12}})",
 160 |               "pd.DataFrame({'one': {0: 1, 1: 7}, 'two': {0: 2, 1: 8}, 'three': {0: 3, 1: 9}})"
 161 |             ],
 162 |             "output": "pd.DataFrame({'one': {0: 4, 1: 10}, 'two': {0: 5, 1: 11}, 'three': {0: 6, 1: 12}})",
 163 |             "invars": [
 164 |               "df1",
 165 |               "df2"
 166 |             ],
 167 |             "outvar": "df2"
 168 |           }
 169 |         ],
 170 |         "solutions": [
 171 |           [
 172 |             "df2 = df1.drop(labels=[0], errors='ignore')",
 173 |             "df2 = df1.drop(df2.index)",
 174 |             "df2 = df1.drop(labels=[0])"
 175 |           ]
 176 |         ]
 177 |       }
 178 |     }
 179 |   },
 180 |   "5": {
 181 |     "sets": {
 182 |       "A": {
 183 |         "queries": [
 184 |           {
 185 |             "query": "Drop rows from dataframe df1 where value of EPS is equal to 89",
 186 |             "user": "u0"
 187 |           }
 188 |         ],
 189 |         "ios": [
 190 |           {
 191 |             "inputs": [
 192 |               "pd.DataFrame({'STK_ID': {0: 601166.0, 1: 600036.0, 2: 600016.0, 3: 601009.0, 4: 601939.0, 5: 1.0}, 'RPT_Date': {0: 20111231.0, 1: 20111231.0, 2: 20111231.0, 3: 20111231.0, 4: 20111231.0, 5: 20111231.0}, 'STK_ID1': {0: 601166.0, 1: 600036.0, 2: 600016.0, 3: 601009.0, 4: 601939.0, 5: 1.0}, 'EPS': {0: 89.0, 1: 89.0, 2: 4.3, 3: 89.0, 4: 2.5, 5: 89.0}, 'cash': {0: 0.0, 1: 12.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0}})"
 193 |             ],
 194 |             "output": "pd.DataFrame({'STK_ID': {2: 600016.0, 4: 601939.0}, 'RPT_Date': {2: 20111231.0, 4: 20111231.0}, 'STK_ID1': {2: 600016.0, 4: 601939.0}, 'EPS': {2: 4.3, 4: 2.5}, 'cash': {2: 0.0, 4: 0.0}})",
 195 |             "invars": [
 196 |               "df1"
 197 |             ],
 198 |             "outvar": "dfout"
 199 |           },
 200 |           {
 201 |             "inputs": [
 202 |               "pd.DataFrame({'a': {0: 1.0, 1: 3.0, 2: 89.0, 3: -1.0, 4: -2.0}, 'EPS': {0: 2.0, 1: 89.0, 2: 5.0, 3: -1.0, 4: -2.0}})"
 203 |             ],
 204 |             "output": "pd.DataFrame({'a': {0: 1.0, 1: 89.0, 2: -1.0, 3: -2.0}, 'EPS': {0: 2.0, 1: 5.0, 2: -1.0, 3: -2.0}})",
 205 |             "invars": [
 206 |               "df1"
 207 |             ],
 208 |             "outvar": "dfout"
 209 |           }
 210 |         ],
 211 |         "solutions": [
 212 |           [
 213 |             "dfout = df1[(df1['EPS'] != 89)]"
 214 |           ]
 215 |         ]
 216 |       }
 217 |     }
 218 |   },
 219 |   "6": {
 220 |     "sets": {
 221 |       "A": {
 222 |         "queries": [
 223 |           {
 224 |             "query": "Apply min-max normalization on df1 and assign the result to dfout",
 225 |             "user": "u0"
 226 |           }
 227 |         ],
 228 |         "ios": [
 229 |           {
 230 |             "inputs": [
 231 |               "pd.DataFrame({'C1': {0: 10, 1: 10, 2: 20, 3: 20}, 'C2': {0: 15, 1: 15, 2: 20, 3: 20}})"
 232 |             ],
 233 |             "output": "pd.DataFrame({'C1': {0: 0.0, 1: 0.0, 2: 1.0, 3: 1.0}, 'C2': {0: 0.0, 1: 0.0, 2: 1.0, 3: 1.0}})",
 234 |             "invars": [
 235 |               "df1"
 236 |             ],
 237 |             "outvar": "dfout"
 238 |           }
 239 |         ],
 240 |         "solutions": [
 241 |           [
 242 |             "dfout = df1.apply((lambda x: ((x - min(x)) / (max(x) - min(x)))))"
 243 |           ]
 244 |         ]
 245 |       }
 246 |     }
 247 |   },
 248 |   "7": {
 249 |     "sets": {
 250 |       "A": {
 251 |         "queries": [
 252 |           {
 253 |             "query": "Apply mean normalization on the columns of df1 and store the result in dfout",
 254 |             "user": "u0"
 255 |           }
 256 |         ],
 257 |         "ios": [
 258 |           {
 259 |             "inputs": [
 260 |               "pd.DataFrame({'C1': {0: 10, 1: 20}, 'C2': {0: 15, 1: 25}})"
 261 |             ],
 262 |             "output": "pd.DataFrame({'C1': {0: -0.707107, 1: 0.707107}, 'C2': {0: -0.707107, 1: 0.707107}})",
 263 |             "invars": [
 264 |               "df1"
 265 |             ],
 266 |             "outvar": "dfout"
 267 |           }
 268 |         ],
 269 |         "solutions": [
 270 |           [
 271 |             "dfout = ((df1 - df1.mean()) / df1.std())"
 272 |           ]
 273 |         ]
 274 |       }
 275 |     }
 276 |   },
 277 |   "8": {
 278 |     "sets": {
 279 |       "A": {
 280 |         "queries": [
 281 |           {
 282 |             "query": "Select the third value in the STID column of mydata and store it in val",
 283 |             "user": "u0"
 284 |           }
 285 |         ],
 286 |         "ios": [
 287 |           {
 288 |             "inputs": [
 289 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 290 |             ],
 291 |             "output": "3.0",
 292 |             "invars": [
 293 |               "mydata"
 294 |             ],
 295 |             "outvar": "val"
 296 |           }
 297 |         ],
 298 |         "solutions": [
 299 |           [
 300 |             "val = mydata['STID'][2]",
 301 |             "val = mydata.iloc[(2, 0)]"
 302 |           ]
 303 |         ]
 304 |       }
 305 |     }
 306 |   },
 307 |   "9": {
 308 |     "sets": {
 309 |       "A": {
 310 |         "queries": [
 311 |           {
 312 |             "query": "Create a new column 'C' in df1 as the sum of the columns 'A' and 'B'",
 313 |             "user": "u0"
 314 |           }
 315 |         ],
 316 |         "ios": [
 317 |           {
 318 |             "inputs": [
 319 |               "pd.DataFrame({'A': {0: 5.0, 1: 0.0, 2: 2.0, 3: 1.0, 4: 9.0, 5: -5.0, 6: 3.0}, 'B': {0: 2.0, 1: 4.0, 2: 6.0, 3: 1.0, 4: 1.0, 5: 7.0, 6: 3.0}})"
 320 |             ],
 321 |             "output": "pd.DataFrame({'A': {0: 5.0, 1: 0.0, 2: 2.0, 3: 1.0, 4: 9.0, 5: -5.0, 6: 3.0}, 'B': {0: 2.0, 1: 4.0, 2: 6.0, 3: 1.0, 4: 1.0, 5: 7.0, 6: 3.0}, 'C': {0: 7.0, 1: 4.0, 2: 8.0, 3: 2.0, 4: 10.0, 5: 2.0, 6: 6.0}})",
 322 |             "invars": [
 323 |               "df1"
 324 |             ],
 325 |             "outvar": "df1"
 326 |           }
 327 |         ],
 328 |         "solutions": [
 329 |           [
 330 |             "df1['C'] = (df1['A'] + df1['B'])"
 331 |           ]
 332 |         ]
 333 |       }
 334 |     }
 335 |   },
 336 |   "10": {
 337 |     "sets": {
 338 |       "A": {
 339 |         "queries": [
 340 |           {
 341 |             "query": "Select the first three rows of the second column of the mydata and store it in out",
 342 |             "user": "u0"
 343 |           }
 344 |         ],
 345 |         "ios": [
 346 |           {
 347 |             "inputs": [
 348 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
 349 |             ],
 350 |             "output": "_s = pd.Series([2, 4, 6])\n_s.name = \"B\"\n_s",
 351 |             "invars": [
 352 |               "mydata"
 353 |             ],
 354 |             "outvar": "out"
 355 |           },
 356 |           {
 357 |             "inputs": [
 358 |               "pd.DataFrame({'a': {0: 1.0, 1: 3.0, 2: 89.0, 3: -1.0, 4: -2.0}, 'EPS': {0: 2.0, 1: 89.0, 2: 5.0, 3: -1.0, 4: -2.0}})"
 359 |             ],
 360 |             "output": "pd.DataFrame({'EPS': {0: 2.0, 1: 89.0, 2: 5.0}})",
 361 |             "invars": [
 362 |               "mydata"
 363 |             ],
 364 |             "outvar": "out"
 365 |           }
 366 |         ],
 367 |         "solutions": [
 368 |           [
 369 |             "out = mydata.iloc[:3, 1]"
 370 |           ]
 371 |         ]
 372 |       }
 373 |     }
 374 |   },
 375 |   "11": {
 376 |     "sets": {
 377 |       "A": {
 378 |         "queries": [
 379 |           {
 380 |             "query": "Compute the fraction of non-zeros in the score column of the dataframe df1",
 381 |             "user": "u0"
 382 |           }
 383 |         ],
 384 |         "ios": [
 385 |           {
 386 |             "inputs": [
 387 |               "pd.DataFrame({'score': {0: 1.0, 1: 2.5, 2: 3.0, 3: 0.0, 4: 0.0, 5: 0.1}, 'C2': {0: 'E2', 1: 'E4', 2: 'E1', 3: 'E5', 4: 'E6', 5: 'E7'}})"
 388 |             ],
 389 |             "output": "0.6666666666666666",
 390 |             "invars": [
 391 |               "df1"
 392 |             ],
 393 |             "outvar": "dfout"
 394 |           }
 395 |         ],
 396 |         "solutions": [
 397 |           [
 398 |             "dfout = sum(df1.score != 0)/len(df1.score)"
 399 |           ]
 400 |         ]
 401 |       }
 402 |     }
 403 |   },
 404 |   "12": {
 405 |     "sets": {
 406 |       "A": {
 407 |         "queries": [
 408 |           {
 409 |             "query": "List the unique values of 'C2'",
 410 |             "user": "u0"
 411 |           }
 412 |         ],
 413 |         "ios": [
 414 |           {
 415 |             "inputs": [
 416 |               "pd.DataFrame({'score': {0: 1.0, 1: 2.5, 2: 44.0, 3: 3.0, 4: 0.0, 5: 1.3, 6: 7.0}, 'C2': {0: 'E2', 1: 'E4', 2: 'E1', 3: 'E1', 4: 'E5', 5: 'E6', 6: 'E2'}})"
 417 |             ],
 418 |             "output": "np.array(['E2', 'E4', 'E1', 'E5', 'E6'])",
 419 |             "invars": [
 420 |               "df1"
 421 |             ],
 422 |             "outvar": "dfout"
 423 |           }
 424 |         ],
 425 |         "solutions": [
 426 |           [
 427 |             "dfout = df1.C2.unique()"
 428 |           ]
 429 |         ]
 430 |       }
 431 |     }
 432 |   },
 433 |   "13": {
 434 |     "sets": {
 435 |       "A": {
 436 |         "queries": [
 437 |           {
 438 |             "query": "For each quarter find the subsidiary with top earnings value",
 439 |             "user": "u0"
 440 |           }
 441 |         ],
 442 |         "ios": [
 443 |           {
 444 |             "inputs": [
 445 |               "pd.DataFrame({'Quarter': {1: 'Q1', 2: 'Q1', 3: 'Q2', 4: 'Q2'}, 'Subsidiary': {1: 'US', 2: 'Kenya', 3: 'US', 4: 'India'}, 'Earnings': {1: 600, 2: 200, 3: 150, 4: 200}})"
 446 |             ],
 447 |             "output": "pd.DataFrame({'Quarter': {1: 'Q1', 4: 'Q2'}, 'Subsidiary': {1: 'US', 4: 'India'}, 'Earnings': {1: 600, 4: 200}})",
 448 |             "invars": [
 449 |               "df1"
 450 |             ],
 451 |             "outvar": "dfout"
 452 |           }
 453 |         ],
 454 |         "solutions": [
 455 |           [
 456 |             "dfout = df1.loc[df1.groupby('Quarter')['Earnings'].idxmax()]"
 457 |           ]
 458 |         ]
 459 |       }
 460 |     }
 461 |   },
 462 |   "14": {
 463 |     "sets": {
 464 |       "A": {
 465 |         "queries": [
 466 |           {
 467 |             "query": "Set the  column 'industry' of dataframe df1 to a value 5",
 468 |             "user": "u0"
 469 |           }
 470 |         ],
 471 |         "ios": [
 472 |           {
 473 |             "inputs": [
 474 |               "pd.DataFrame({'issueid': {0: 1, 1: 2, 2: 3, 3: 4, 4: 5}, 'industry': {0: 1, 1: 2, 2: 3, 3: 4, 4: 5}})"
 475 |             ],
 476 |             "output": "pd.DataFrame({'issueid': {0: 1, 1: 2, 2: 3, 3: 4, 4: 5}, 'industry': {0: 5, 1: 5, 2: 5, 3: 5, 4: 5}})",
 477 |             "invars": [
 478 |               "df1"
 479 |             ],
 480 |             "outvar": "df1"
 481 |           }
 482 |         ],
 483 |         "solutions": [
 484 |           [
 485 |             "df1['industry'] = 5"
 486 |           ]
 487 |         ]
 488 |       }
 489 |     }
 490 |   },
 491 |   "15": {
 492 |     "sets": {
 493 |       "A": {
 494 |         "queries": [
 495 |           {
 496 |             "query": "Sort the rows by column 'B' in descending order in df1 and assign it to dfout",
 497 |             "user": "u0"
 498 |           }
 499 |         ],
 500 |         "ios": [
 501 |           {
 502 |             "inputs": [
 503 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
 504 |             ],
 505 |             "output": "pd.DataFrame({'A': {5: -5, 2: 2, 1: 0, 6: 3, 0: 5, 3: 1, 4: 9}, 'B': {5: 7, 2: 6, 1: 4, 6: 3, 0: 2, 3: 1, 4: 1}})",
 506 |             "invars": [
 507 |               "df1"
 508 |             ],
 509 |             "outvar": "dfout"
 510 |           }
 511 |         ],
 512 |         "solutions": [
 513 |           [
 514 |             "dfout = df1.sort_values(by='B', ascending=False).copy()"
 515 |           ]
 516 |         ]
 517 |       }
 518 |     }
 519 |   },
 520 |   "16": {
 521 |     "sets": {
 522 |       "A": {
 523 |         "queries": [
 524 |           {
 525 |             "query": "Rename the 'key' column of the dataframe df1 to 'KEY' and return it to dfout",
 526 |             "user": "u0"
 527 |           }
 528 |         ],
 529 |         "ios": [
 530 |           {
 531 |             "inputs": [
 532 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 533 |             ],
 534 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'KEY': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})",
 535 |             "invars": [
 536 |               "df1"
 537 |             ],
 538 |             "outvar": "dfout"
 539 |           }
 540 |         ],
 541 |         "solutions": [
 542 |           [
 543 |             "dfout = df1.rename(columns={\n    'key': 'KEY',\n})"
 544 |           ]
 545 |         ]
 546 |       }
 547 |     }
 548 |   },
 549 |   "17": {
 550 |     "sets": {
 551 |       "A": {
 552 |         "queries": [
 553 |           {
 554 |             "query": "put first two row of df1 in dfout",
 555 |             "user": "u0"
 556 |           }
 557 |         ],
 558 |         "ios": [
 559 |           {
 560 |             "inputs": [
 561 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 562 |             ],
 563 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0}, 'key': {'a': 1, 'b': 3}})",
 564 |             "invars": [
 565 |               "df1"
 566 |             ],
 567 |             "outvar": "dfout"
 568 |           }
 569 |         ],
 570 |         "solutions": [
 571 |           [
 572 |             "dfout = df1[:2]"
 573 |           ]
 574 |         ]
 575 |       }
 576 |     }
 577 |   },
 578 |   "18": {
 579 |     "sets": {
 580 |       "A": {
 581 |         "queries": [
 582 |           {
 583 |             "query": "put third row of df1 in dfout",
 584 |             "user": "u0"
 585 |           }
 586 |         ],
 587 |         "ios": [
 588 |           {
 589 |             "inputs": [
 590 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 591 |             ],
 592 |             "output": "pd.DataFrame({'STID': {'c': 3.0}, 'key': {'c': 4}})",
 593 |             "invars": [
 594 |               "df1"
 595 |             ],
 596 |             "outvar": "dfout"
 597 |           }
 598 |         ],
 599 |         "solutions": [
 600 |           [
 601 |             "dfout = df1[2:3]"
 602 |           ]
 603 |         ]
 604 |       }
 605 |     }
 606 |   },
 607 |   "19": {
 608 |     "sets": {
 609 |       "A": {
 610 |         "queries": [
 611 |           {
 612 |             "query": "put first and third row of df1 in dfout",
 613 |             "user": "u0"
 614 |           }
 615 |         ],
 616 |         "ios": [
 617 |           {
 618 |             "inputs": [
 619 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 620 |             ],
 621 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'c': 3.0}, 'key': {'a': 1, 'c': 4}})",
 622 |             "invars": [
 623 |               "df1"
 624 |             ],
 625 |             "outvar": "dfout"
 626 |           },
 627 |           {
 628 |             "inputs": [
 629 |               "pd.DataFrame({'a': {0: 1.0, 1: 3.0, 2: 89.0, 3: -1.0, 4: -2.0}, 'EPS': {0: 2.0, 1: 89.0, 2: 5.0, 3: -1.0, 4: -2.0}})"
 630 |             ],
 631 |             "output": "pd.DataFrame({'a': {0: 1.0, 1: 89.0}, 'EPS': {0: 2.0, 1: 5.0}})",
 632 |             "invars": [
 633 |               "df1"
 634 |             ],
 635 |             "outvar": "dfout"
 636 |           }
 637 |         ],
 638 |         "solutions": [
 639 |           [
 640 |             "dfout = df1.loc[['a','c']]"
 641 |           ]
 642 |         ]
 643 |       }
 644 |     }
 645 |   },
 646 |   "20": {
 647 |     "sets": {
 648 |       "A": {
 649 |         "queries": [
 650 |           {
 651 |             "query": "Remove last three rows from df1 and assign it to dfout",
 652 |             "user": "u0"
 653 |           }
 654 |         ],
 655 |         "ios": [
 656 |           {
 657 |             "inputs": [
 658 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
 659 |             ],
 660 |             "output": "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1}, 'B': {0: 2, 1: 4, 2: 6, 3: 1}})",
 661 |             "invars": [
 662 |               "df1"
 663 |             ],
 664 |             "outvar": "dfout"
 665 |           },
 666 |           {
 667 |             "inputs": [
 668 |               "pd.DataFrame({'a': {0: 1.0, 1: 1.0, 2: 3.0, 3: 3.0, 4: 89.0, 5: -1.0, 6: -2.0, 7: -1.0, 8: -2.0}, 'EPS': {0: 1.0, 1: 2.0, 2: 89.0, 3: 90.0, 4: 5.0, 5: -1.0, 6: -2.0, 7: -1.0, 8: -2.0}})"
 669 |             ],
 670 |             "output": "pd.DataFrame({'a': {0: 1.0, 1: 1.0, 2: 3.0, 3: 3.0, 4: 89.0, 5: -1.0}, 'EPS': {0: 1.0, 1: 2.0, 2: 89.0, 3: 90.0, 4: 5.0, 5: -1.0}})",
 671 |             "invars": [
 672 |               "df1"
 673 |             ],
 674 |             "outvar": "dfout"
 675 |           }
 676 |         ],
 677 |         "solutions": [
 678 |           [
 679 |             "dfout = df1.drop(labels=[4, 5, 6], errors='ignore')",
 680 |             "dfout = df1.head(len(df1)-3)",
 681 |             "dfout = df1.drop(labels=[4, 5, 6])"
 682 |           ]
 683 |         ]
 684 |       }
 685 |     }
 686 |   },
 687 |   "21": {
 688 |     "sets": {
 689 |       "A": {
 690 |         "queries": [
 691 |           {
 692 |             "query": "take last three rows in df1 and assign it to dfout",
 693 |             "user": "u0"
 694 |           }
 695 |         ],
 696 |         "ios": [
 697 |           {
 698 |             "inputs": [
 699 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
 700 |             ],
 701 |             "output": "pd.DataFrame({'A': {4: 9, 5: -5, 6: 3}, 'B': {4: 1, 5: 7, 6: 3}})",
 702 |             "invars": [
 703 |               "df1"
 704 |             ],
 705 |             "outvar": "dfout"
 706 |           }
 707 |         ],
 708 |         "solutions": [
 709 |           [
 710 |             "dfout = df1[(- 3):]"
 711 |           ]
 712 |         ]
 713 |       }
 714 |     }
 715 |   },
 716 |   "22": {
 717 |     "sets": {
 718 |       "A": {
 719 |         "queries": [
 720 |           {
 721 |             "query": "count the number of null values in df1 and return it to dfout",
 722 |             "user": "u0"
 723 |           }
 724 |         ],
 725 |         "ios": [
 726 |           {
 727 |             "inputs": [
 728 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 729 |             ],
 730 |             "output": "1",
 731 |             "invars": [
 732 |               "df1"
 733 |             ],
 734 |             "outvar": "dfout"
 735 |           }
 736 |         ],
 737 |         "solutions": [
 738 |           [
 739 |             "dfout = df1.isnull().sum().sum()"
 740 |           ]
 741 |         ]
 742 |       }
 743 |     }
 744 |   },
 745 |   "23": {
 746 |     "sets": {
 747 |       "A": {
 748 |         "queries": [
 749 |           {
 750 |             "query": "drop rows with null values in df1 and return it to dfout",
 751 |             "user": "u0"
 752 |           }
 753 |         ],
 754 |         "ios": [
 755 |           {
 756 |             "inputs": [
 757 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 758 |             ],
 759 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0}, 'key': {'a': 1, 'b': 3, 'c': 4}})",
 760 |             "invars": [
 761 |               "df1"
 762 |             ],
 763 |             "outvar": "dfout"
 764 |           }
 765 |         ],
 766 |         "solutions": [
 767 |           [
 768 |             "dfout = df1.dropna()"
 769 |           ]
 770 |         ]
 771 |       }
 772 |     }
 773 |   },
 774 |   "24": {
 775 |     "sets": {
 776 |       "A": {
 777 |         "queries": [
 778 |           {
 779 |             "query": "replace null values in df1 with 4.0 and return it to dfout",
 780 |             "user": "u0"
 781 |           }
 782 |         ],
 783 |         "ios": [
 784 |           {
 785 |             "inputs": [
 786 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 787 |             ],
 788 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': 4.0}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})",
 789 |             "invars": [
 790 |               "df1"
 791 |             ],
 792 |             "outvar": "dfout"
 793 |           }
 794 |         ],
 795 |         "solutions": [
 796 |           [
 797 |             "dfout = df1.fillna(4.0)",
 798 |             "dfout = df1.replace(np.nan, 4.0)"
 799 |           ]
 800 |         ]
 801 |       }
 802 |     }
 803 |   },
 804 |   "25": {
 805 |     "sets": {
 806 |       "A": {
 807 |         "queries": [
 808 |           {
 809 |             "query": "replace null values in df1 with mean of the column and return it to dfout",
 810 |             "user": "u0"
 811 |           }
 812 |         ],
 813 |         "ios": [
 814 |           {
 815 |             "inputs": [
 816 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1.0, 'b': None, 'c': 4.0, 'd': 7.0}})"
 817 |             ],
 818 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': 2.0}, 'key': {'a': 1.0, 'b': 4.0, 'c': 4.0, 'd': 7.0}})",
 819 |             "invars": [
 820 |               "df1"
 821 |             ],
 822 |             "outvar": "dfout"
 823 |           }
 824 |         ],
 825 |         "solutions": [
 826 |           [
 827 |             "dfout = df1.replace(np.nan, df1.mean())",
 828 |             "dfout = df1.fillna(df1.mean())"
 829 |           ]
 830 |         ]
 831 |       }
 832 |     }
 833 |   },
 834 |   "26": {
 835 |     "sets": {
 836 |       "A": {
 837 |         "queries": [
 838 |           {
 839 |             "query": "find mean and median of columns in df1 and save it to dfout",
 840 |             "user": "u0"
 841 |           }
 842 |         ],
 843 |         "ios": [
 844 |           {
 845 |             "inputs": [
 846 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
 847 |             ],
 848 |             "output": "pd.DataFrame({'A': {0: 2.142857142857143, 1: 2.0}, 'B': {0: 3.4285714285714284, 1: 3.0}})",
 849 |             "invars": [
 850 |               "df1"
 851 |             ],
 852 |             "outvar": "dfout"
 853 |           }
 854 |         ],
 855 |         "solutions": [
 856 |           [
 857 |             "dfout = pd.DataFrame([df1.mean(), df1.median()])"
 858 |           ]
 859 |         ]
 860 |       }
 861 |     }
 862 |   },
 863 |   "27": {
 864 |     "sets": {
 865 |       "A": {
 866 |         "queries": [
 867 |           {
 868 |             "query": "Filter rows where there is more than one NaN.",
 869 |             "user": "u0"
 870 |           }
 871 |         ],
 872 |         "ios": [
 873 |           {
 874 |             "inputs": [
 875 |               "pd.DataFrame({'Score1': {0: 100.0, 1: 90.0, 2: None, 3: 95.0}, 'Score2': {0: 30.0, 1: None, 2: 45.0, 3: 56.0}, 'Score3': {0: 52, 1: 40, 2: 80, 3: 98}, 'Score4': {0: None, 1: None, 2: None, 3: 65.0}})"
 876 |             ],
 877 |             "output": "pd.DataFrame({'Score1': {1: 90.0, 2: None}, 'Score2': {1: None, 2: 45.0}, 'Score3': {1: 40, 2: 80}, 'Score4': {1: None, 2: None}})",
 878 |             "invars": [
 879 |               "df1"
 880 |             ],
 881 |             "outvar": "dfout"
 882 |           },
 883 |           {
 884 |             "inputs": [
 885 |               "pd.DataFrame({'a': {0: None, 1: 3.0, 2: 89.0, 3: None, 4: -2.0, 5: -1.0, 6: None}, 'EPS': {0: 2.0, 1: 89.0, 2: 5.0, 3: None, 4: -2.0, 5: -1.0, 6: None}})"
 886 |             ],
 887 |             "output": "pd.DataFrame({'a': {0: None, 1: None}, 'EPS': {0: None, 1: None}})",
 888 |             "invars": [
 889 |               "df1"
 890 |             ],
 891 |             "outvar": "dfout"
 892 |           }
 893 |         ],
 894 |         "solutions": [
 895 |           [
 896 |             "dfout = df1.loc[(df1.isnull().sum(axis=1) > 1), :]"
 897 |           ]
 898 |         ]
 899 |       }
 900 |     }
 901 |   },
 902 |   "28": {
 903 |     "sets": {
 904 |       "A": {
 905 |         "queries": [
 906 |           {
 907 |             "query": "Set 'A' as 5 in 6th row of df1",
 908 |             "user": "u0"
 909 |           }
 910 |         ],
 911 |         "ios": [
 912 |           {
 913 |             "inputs": [
 914 |               "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: -5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})"
 915 |             ],
 916 |             "output": "pd.DataFrame({'A': {0: 5, 1: 0, 2: 2, 3: 1, 4: 9, 5: 5, 6: 3}, 'B': {0: 2, 1: 4, 2: 6, 3: 1, 4: 1, 5: 7, 6: 3}})",
 917 |             "invars": [
 918 |               "df1"
 919 |             ],
 920 |             "outvar": "dfout"
 921 |           }
 922 |         ],
 923 |         "solutions": [
 924 |           "df1.loc[5, 'A'] = 5"
 925 |         ]
 926 |       }
 927 |     }
 928 |   },
 929 |   "29": {
 930 |     "sets": {
 931 |       "A": {
 932 |         "queries": [
 933 |           {
 934 |             "query": "Remove rows with null values from df1 and return it to dfout",
 935 |             "user": "u0"
 936 |           }
 937 |         ],
 938 |         "ios": [
 939 |           {
 940 |             "inputs": [
 941 |               "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': None}, 'key': {'a': 1, 'b': 3, 'c': 4, 'd': 7}})"
 942 |             ],
 943 |             "output": "pd.DataFrame({'STID': {'a': 1.0, 'b': 2.0, 'c': 3.0}, 'key': {'a': 1, 'b': 3, 'c': 4}})",
 944 |             "invars": [
 945 |               "df1"
 946 |             ],
 947 |             "outvar": "dfout"
 948 |           }
 949 |         ],
 950 |         "solutions": [
 951 |           [
 952 |             "dfout = df1.dropna()"
 953 |           ]
 954 |         ]
 955 |       }
 956 |     }
 957 |   },
 958 |   "30": {
 959 |     "sets": {
 960 |       "A": {
 961 |         "queries": [
 962 |           {
 963 |             "query": "Concatenate two data frames and drop duplicates",
 964 |             "user": "u0"
 965 |           }
 966 |         ],
 967 |         "ios": [
 968 |           {
 969 |             "inputs": [
 970 |               "pd.DataFrame({'A': {0: 1, 1: 3}, 'B': {0: 2, 1: 1}})",
 971 |               "pd.DataFrame({'A': {0: 5, 1: 3}, 'B': {0: 6, 1: 1}})"
 972 |             ],
 973 |             "output": "pd.DataFrame({'A': {0: 1, 1: 3, 2: 5}, 'B': {0: 2, 1: 1, 2: 6}})",
 974 |             "invars": [
 975 |               "df1",
 976 |               "df2"
 977 |             ],
 978 |             "outvar": "dfout"
 979 |           }
 980 |         ],
 981 |         "solutions": [
 982 |           [
 983 |             "dfout = pd.concat([df1, df2]).drop_duplicates().reset_index(drop=True)"
 984 |           ]
 985 |         ]
 986 |       }
 987 |     }
 988 |   },
 989 |   "31": {
 990 |     "sets": {
 991 |       "A": {
 992 |         "queries": [
 993 |           {
 994 |             "query": "Drop duplicates from dataframe and reindex.",
 995 |             "user": "u0"
 996 |           }
 997 |         ],
 998 |         "ios": [
 999 |           {
1000 |             "inputs": [
1001 |               "pd.DataFrame({'A': {0: 1, 1: 3, 2: 5, 3: 3}, 'B': {0: 2, 1: 1, 2: 6, 3: 1}})"
1002 |             ],
1003 |             "output": "pd.DataFrame({'A': {0: 1, 1: 3, 2: 5}, 'B': {0: 2, 1: 1, 2: 6}})",
1004 |             "invars": [
1005 |               "df1"
1006 |             ],
1007 |             "outvar": "dfout"
1008 |           }
1009 |         ],
1010 |         "solutions": [
1011 |           [
1012 |             "dfout = df1.drop_duplicates(keep='first')",
1013 |             "dfout = df1.drop_duplicates(keep='first').reindex(columns=['A', 'B'])"
1014 |           ]
1015 |         ]
1016 |       }
1017 |     }
1018 |   },
1019 |   "32": {
1020 |     "sets": {
1021 |       "A": {
1022 |         "queries": [
1023 |           {
1024 |             "query": "Transpose the dataframe",
1025 |             "user": "u0"
1026 |           }
1027 |         ],
1028 |         "ios": [
1029 |           {
1030 |             "inputs": [
1031 |               "pd.DataFrame({'A': {'a': 1, 'b': 2, 'c': 3}, 'B': {'a': 4, 'b': 5, 'c': 6}, 'C': {'a': 7, 'b': 8, 'c': 9}})"
1032 |             ],
1033 |             "output": "pd.DataFrame({'a': {'A': 1, 'B': 4, 'C': 7}, 'b': {'A': 2, 'B': 5, 'C': 8}, 'c': {'A': 3, 'B': 6, 'C': 9}})",
1034 |             "invars": [
1035 |               "df1"
1036 |             ],
1037 |             "outvar": "dfout"
1038 |           }
1039 |         ],
1040 |         "solutions": [
1041 |           [
1042 |             "dfout = df1.T"
1043 |           ]
1044 |         ]
1045 |       }
1046 |     }
1047 |   },
1048 |   "33": {
1049 |     "sets": {
1050 |       "A": {
1051 |         "queries": [
1052 |           {
1053 |             "query": "Select rows where location is 'a'",
1054 |             "user": "u0"
1055 |           }
1056 |         ],
1057 |         "ios": [
1058 |           {
1059 |             "inputs": [
1060 |               "pd.DataFrame({'date': {0: 20130101, 1: 20130101, 2: 20130102}, 'location': {0: 'a', 1: 'a', 2: 'c'}})"
1061 |             ],
1062 |             "output": "pd.DataFrame({'date': {0: 20130101, 1: 20130101}, 'location': {0: 'a', 1: 'a'}})",
1063 |             "invars": [
1064 |               "df1"
1065 |             ],
1066 |             "outvar": "dfout"
1067 |           }
1068 |         ],
1069 |         "solutions": [
1070 |           [
1071 |             "dfout = df1[(df1['location'] == 'a')]"
1072 |           ]
1073 |         ]
1074 |       }
1075 |     }
1076 |   },
1077 |   "34": {
1078 |     "sets": {
1079 |       "A": {
1080 |         "queries": [
1081 |           {
1082 |             "query": "Filter the columns from dataframe where type equal to 'float64'",
1083 |             "user": "u0"
1084 |           }
1085 |         ],
1086 |         "ios": [
1087 |           {
1088 |             "inputs": [
1089 |               "pd.DataFrame({0: {0: 1}, 1: {0: 'a'}, 2: {0: 2.0}})"
1090 |             ],
1091 |             "output": "pd.DataFrame({0: {0: 1}, 1: {0: 'a'}})",
1092 |             "invars": [
1093 |               "df1"
1094 |             ],
1095 |             "outvar": "dfout"
1096 |           },
1097 |           {
1098 |             "inputs": [
1099 |               "pd.DataFrame({'A': {0: 1.1, 1: 1.1, 2: 1.1}, 'B': {0: 2.3, 1: 5.0, 2: 5.0}, 'C': {0: 5.7, 1: 5.7, 2: 5.7}, 'D': {0: 0, 1: 0, 2: 100}, 'E': {0: 21, 1: 21, 2: 99}, 'F': {0: 55.0, 1: 55.0, 2: 105.5}, 'G': {0: '333', 1: '444', 2: '444'}})"
1100 |             ],
1101 |             "output": "pd.DataFrame({'D': {0: 0, 1: 0, 2: 100}, 'E': {0: 21, 1: 21, 2: 99}, 'G': {0: '333', 1: '444', 2: '444'}})",
1102 |             "invars": [
1103 |               "df1"
1104 |             ],
1105 |             "outvar": "dfout"
1106 |           }
1107 |         ],
1108 |         "solutions": [
1109 |           [
1110 |             "dfout = df1.drop(labels=[2], axis=1)",
1111 |             "dfout = df1.loc[:, df1.dtypes != 'float64']"
1112 |           ]
1113 |         ]
1114 |       }
1115 |     }
1116 |   },
1117 |   "35": {
1118 |     "sets": {
1119 |       "A": {
1120 |         "queries": [
1121 |           {
1122 |             "query": "Check if all the values of column 'C1' in the dataframe contains values 1, 2 or 3.",
1123 |             "user": "u0"
1124 |           }
1125 |         ],
1126 |         "ios": [
1127 |           {
1128 |             "inputs": [
1129 |               "pd.DataFrame({'C1': {0: 1, 1: 2, 2: 2, 3: 1, 4: 3}, 'C2': {0: 'E2', 1: 'E4', 2: 'E2', 3: 'E4', 4: 'E6'}})"
1130 |             ],
1131 |             "output": "_s = pd.Series([True, True, True, True, True])\n_s.name = \"C1\"\n_s",
1132 |             "invars": [
1133 |               "df1"
1134 |             ],
1135 |             "outvar": "dfout"
1136 |           }
1137 |         ],
1138 |         "solutions": [
1139 |           [
1140 |             "dfout = df1.C1.isin(['1', '2', '3'])"
1141 |           ]
1142 |         ]
1143 |       }
1144 |     }
1145 |   },
1146 |   "36": {
1147 |     "sets": {
1148 |       "A": {
1149 |         "queries": [
1150 |           {
1151 |             "query": "Convert the 'foo' column in the dataframe to numeric ignoring the errors.",
1152 |             "user": "u0"
1153 |           }
1154 |         ],
1155 |         "ios": [
1156 |           {
1157 |             "inputs": [
1158 |               "pd.DataFrame({'foo': {0: '1', 1: '2.0', 2: '-', 3: '-', 4: '3.447'}})"
1159 |             ],
1160 |             "output": "pd.DataFrame({'foo': {0: 1.0, 1: 2.0, 2: None, 3: None, 4: 3.447}})",
1161 |             "invars": [
1162 |               "df1"
1163 |             ],
1164 |             "outvar": "df1"
1165 |           }
1166 |         ],
1167 |         "solutions": [
1168 |           [
1169 |             "df1['foo'] = pd.to_numeric(df1['foo'], errors='coerce')"
1170 |           ]
1171 |         ]
1172 |       }
1173 |     }
1174 |   },
1175 |   "37": {
1176 |     "sets": {
1177 |       "A": {
1178 |         "queries": [
1179 |           {
1180 |             "query": "Convert the 'foo' column in the dataframe to numeric",
1181 |             "user": "u0"
1182 |           }
1183 |         ],
1184 |         "ios": [
1185 |           {
1186 |             "inputs": [
1187 |               "pd.DataFrame({'foo': {0: '1', 1: '2.0', 2: '-', 3: '-', 4: '3.447'}})"
1188 |             ],
1189 |             "output": "pd.DataFrame({'foo': {0: 1.0, 1: 2.0, 2: None, 3: None, 4: 3.447}})",
1190 |             "invars": [
1191 |               "df1"
1192 |             ],
1193 |             "outvar": "df1"
1194 |           }
1195 |         ],
1196 |         "solutions": [
1197 |           [
1198 |             "df1['foo'] = pd.to_numeric(df1['foo'], errors='coerce')"
1199 |           ]
1200 |         ]
1201 |       }
1202 |     }
1203 |   },
1204 |   "38": {
1205 |     "sets": {
1206 |       "A": {
1207 |         "queries": [
1208 |           {
1209 |             "query": "Move column 'deaths' to be the first column of df",
1210 |             "user": "u0"
1211 |           }
1212 |         ],
1213 |         "ios": [
1214 |           {
1215 |             "inputs": [
1216 |               "pd.DataFrame({'name': {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'}, 'val': {0: 12, 1: 11, 2: 17, 3: 14, 4: 5}, 'deaths': {0: 45, 1: 92, 2: 22, 3: 39, 4: 79}, 'ix': {0: 2, 1: 3, 2: 2, 3: 2, 4: 4}})"
1217 |             ],
1218 |             "output": "pd.DataFrame({'deaths': {0: 45, 1: 92, 2: 22, 3: 39, 4: 79}, 'name': {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'}, 'val': {0: 12, 1: 11, 2: 17, 3: 14, 4: 5}, 'ix': {0: 2, 1: 3, 2: 2, 3: 2, 4: 4}})",
1219 |             "invars": [
1220 |               "df",
1221 |               "x"
1222 |             ],
1223 |             "outvar": "df"
1224 |           },
1225 |           {
1226 |             "inputs": [
1227 |               "pd.DataFrame({'one': {0: 1, 1: 4, 2: 7, 3: 10}, 'two': {0: 2, 1: 5, 2: 8, 3: 11}, 'deaths': {0: 3, 1: 6, 2: 9, 3: 12}})"
1228 |             ],
1229 |             "output": "pd.DataFrame({'deaths': {0: 3, 1: 6, 2: 9, 3: 12}, 'one': {0: 1, 1: 4, 2: 7, 3: 10}, 'two': {0: 2, 1: 5, 2: 8, 3: 11}})",
1230 |             "invars": [
1231 |               "df",
1232 |               "x"
1233 |             ],
1234 |             "outvar": "df"
1235 |           }
1236 |         ],
1237 |         "solutions": [
1238 |           "df = df[['deaths'] + [x for x in df.columns if x!='deaths']]\n"
1239 |         ]
1240 |       }
1241 |     }
1242 |   },
1243 |   "39": {
1244 |     "sets": {
1245 |       "A": {
1246 |         "queries": [
1247 |           {
1248 |             "query": "Convert column 'time' of df to pandas datetime",
1249 |             "user": "u0"
1250 |           }
1251 |         ],
1252 |         "ios": [
1253 |           {
1254 |             "inputs": [
1255 |               "pd.DataFrame({'time': {0: '04:12:40', 1: '04:17:01', 2: '04:17:01', 3: '05:12:56', 4: '05:33:12', 5: '05:38:33', 6: '05:48:52', 7: '05:50:22', 8: '04:17:01'}, 'val': {0: 30, 1: 22, 2: 23, 3: 4, 4: 5, 5: 18, 6: 13, 7: 13, 8: 18}})"
1256 |             ],
1257 |             "output": "pd.DataFrame({'time': {0: pd.Timestamp('2021-09-03 04:12:40'), 1: pd.Timestamp('2021-09-03 04:17:01'), 2: pd.Timestamp('2021-09-03 04:17:01'), 3: pd.Timestamp('2021-09-03 05:12:56'), 4: pd.Timestamp('2021-09-03 05:33:12'), 5: pd.Timestamp('2021-09-03 05:38:33'), 6: pd.Timestamp('2021-09-03 05:48:52'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {0: 30, 1: 22, 2: 23, 3: 4, 4: 5, 5: 18, 6: 13, 7: 13, 8: 18}})",
1258 |             "invars": [
1259 |               "df"
1260 |             ],
1261 |             "outvar": "df"
1262 |           }
1263 |         ],
1264 |         "solutions": [
1265 |           "df['time'] = pd.to_datetime(df['time'])\n"
1266 |         ]
1267 |       }
1268 |     }
1269 |   },
1270 |   "40": {
1271 |     "sets": {
1272 |       "A": {
1273 |         "queries": [
1274 |           {
1275 |             "query": "remove rows with duplicates occuring consecutively in column 'time' of dataframe df",
1276 |             "user": "u0"
1277 |           }
1278 |         ],
1279 |         "ios": [
1280 |           {
1281 |             "inputs": [
1282 |               "pd.DataFrame({'time': {0: pd.Timestamp('2021-09-03 04:12:40'), 1: pd.Timestamp('2021-09-03 04:17:01'), 2: pd.Timestamp('2021-09-03 04:17:01'), 3: pd.Timestamp('2021-09-03 05:12:56'), 4: pd.Timestamp('2021-09-03 05:33:12'), 5: pd.Timestamp('2021-09-03 05:38:33'), 6: pd.Timestamp('2021-09-03 05:48:52'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {0: 30, 1: 22, 2: 23, 3: 4, 4: 5, 5: 18, 6: 13, 7: 13, 8: 18}})"
1283 |             ],
1284 |             "output": "pd.DataFrame({'time': {0: pd.Timestamp('2021-09-03 04:12:40'), 1: pd.Timestamp('2021-09-03 04:17:01'), 3: pd.Timestamp('2021-09-03 05:12:56'), 4: pd.Timestamp('2021-09-03 05:33:12'), 5: pd.Timestamp('2021-09-03 05:38:33'), 6: pd.Timestamp('2021-09-03 05:48:52'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {0: 30, 1: 22, 3: 4, 4: 5, 5: 18, 6: 13, 7: 13, 8: 18}})",
1285 |             "invars": [
1286 |               "df"
1287 |             ],
1288 |             "outvar": "dfout"
1289 |           }
1290 |         ],
1291 |         "solutions": [
1292 |           "dfout = df[df['time'].diff()!=pd.Timedelta(\"0\")]\n"
1293 |         ]
1294 |       }
1295 |     }
1296 |   },
1297 |   "41": {
1298 |     "sets": {
1299 |       "A": {
1300 |         "queries": [
1301 |           {
1302 |             "query": "drop rows logged at less than 10 minute 'time' interval in column df",
1303 |             "user": "u0"
1304 |           }
1305 |         ],
1306 |         "ios": [
1307 |           {
1308 |             "inputs": [
1309 |               "pd.DataFrame({'time': {0: pd.Timestamp('2021-09-03 04:12:40'), 1: pd.Timestamp('2021-09-03 04:17:01'), 2: None, 3: pd.Timestamp('2021-09-03 05:12:56'), 4: pd.Timestamp('2021-09-03 05:33:12'), 5: pd.Timestamp('2021-09-03 05:38:33'), 6: pd.Timestamp('2021-09-03 05:48:52'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {0: 30, 1: 22, 2: 23, 3: 4, 4: 5, 5: 18, 6: 13, 7: 13, 8: 18}})"
1310 |             ],
1311 |             "output": "pd.DataFrame({'time': {1: pd.Timestamp('2021-09-03 04:17:01'), 5: pd.Timestamp('2021-09-03 05:38:33'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {1: 22, 5: 18, 7: 13, 8: 18}})",
1312 |             "invars": [
1313 |               "df"
1314 |             ],
1315 |             "outvar": "dfout"
1316 |           },
1317 |           {
1318 |             "inputs": [
1319 |               "pd.DataFrame({'time': {0: pd.Timestamp('2021-09-03 04:12:40'), 1: pd.Timestamp('2021-09-03 04:17:01'), 2: pd.Timestamp('2021-09-03 04:17:01'), 3: pd.Timestamp('2021-09-03 05:12:56'), 4: pd.Timestamp('2021-09-03 05:33:12'), 5: pd.Timestamp('2021-09-03 05:38:33'), 6: pd.Timestamp('2021-09-03 05:48:52'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 05:12:56'), 9: pd.Timestamp('2021-09-03 05:33:12'), 10: pd.Timestamp('2021-09-03 05:38:33'), 11: pd.Timestamp('2021-09-03 05:48:52'), 12: pd.Timestamp('2021-09-03 05:50:22'), 13: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {0: 30, 1: 22, 2: 23, 3: 4, 4: 5, 5: 18, 6: 13, 7: 13, 8: 4, 9: 5, 10: 18, 11: 13, 12: 13, 13: 18}})"
1320 |             ],
1321 |             "output": "pd.DataFrame({'time': {1: pd.Timestamp('2021-09-03 04:17:01'), 2: pd.Timestamp('2021-09-03 04:17:01'), 5: pd.Timestamp('2021-09-03 05:38:33'), 7: pd.Timestamp('2021-09-03 05:50:22'), 8: pd.Timestamp('2021-09-03 05:12:56'), 10: pd.Timestamp('2021-09-03 05:38:33'), 12: pd.Timestamp('2021-09-03 05:50:22'), 13: pd.Timestamp('2021-09-03 04:17:01')}, 'val': {1: 22, 2: 23, 5: 18, 7: 13, 8: 4, 10: 18, 12: 13, 13: 18}})",
1322 |             "invars": [
1323 |               "df"
1324 |             ],
1325 |             "outvar": "dfout"
1326 |           }
1327 |         ],
1328 |         "solutions": [
1329 |           "dfout = df[df['time'].diff()<pd.Timedelta(\"00:10:00\")]\n"
1330 |         ]
1331 |       }
1332 |     }
1333 |   },
1334 |   "42": {
1335 |     "sets": {
1336 |       "A": {
1337 |         "queries": [
1338 |           {
1339 |             "query": "Group df1 on column 'b' and keep half of the elements at random",
1340 |             "user": "u0"
1341 |           }
1342 |         ],
1343 |         "ios": [
1344 |           {
1345 |             "inputs": [
1346 |               "pd.DataFrame({'b': {0: 1, 1: 1, 2: 1, 3: 3, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 4, 10: 4}, 'c': {0: 4, 1: 7, 2: 5, 3: 3, 4: 1, 5: 9, 6: 11, 7: 12, 8: 15, 9: 4, 10: 9}})"
1347 |             ],
1348 |             "output": "_df = pd.DataFrame({'b': {(1, 0): 1, (1, 2): 1, (3, 5): 3, (3, 4): 3, (4, 9): 4, (4, 7): 4}, 'c': {(1, 0): 4, (1, 2): 5, (3, 5): 9, (3, 4): 1, (4, 9): 4, (4, 7): 12}})\n_df.index.names = ['b', None]\n_df",
1349 |             "invars": [
1350 |               "df1",
1351 |               "x"
1352 |             ],
1353 |             "outvar": "dfout"
1354 |           }
1355 |         ],
1356 |         "solutions": [
1357 |           "dfout = df1.groupby('b').apply(lambda x : x.sample(frac=0.5))\n"
1358 |         ]
1359 |       }
1360 |     }
1361 |   },
1362 |   "43": {
1363 |     "sets": {
1364 |       "A": {
1365 |         "queries": [
1366 |           {
1367 |             "query": "In column 'X' of dataframe d replace value 15 with 1015 and -15 with -1015",
1368 |             "user": "u0"
1369 |           }
1370 |         ],
1371 |         "ios": [
1372 |           {
1373 |             "inputs": [
1374 |               "pd.DataFrame({'X': {0: 5, 1: 10, 2: 15, 3: -15, 4: 15, 5: -10}, 'Y': {0: 12, 1: 15, 2: 2000, 3: 3000, 4: -2015, 5: 0}})"
1375 |             ],
1376 |             "output": "pd.DataFrame({'X': {0: 5, 1: 10, 2: 1015, 3: -1015, 4: 1015, 5: -10}, 'Y': {0: 12, 1: 15, 2: 2000, 3: 3000, 4: -2015, 5: 0}})",
1377 |             "invars": [
1378 |               "df"
1379 |             ],
1380 |             "outvar": "df"
1381 |           }
1382 |         ],
1383 |         "solutions": [
1384 |           "df = df.replace({'X':{15:1015, -15:-1015}})\n"
1385 |         ]
1386 |       }
1387 |     }
1388 |   },
1389 |   "44": {
1390 |     "sets": {
1391 |       "A": {
1392 |         "queries": [
1393 |           {
1394 |             "query": "Make columns of df1 same as that of df2",
1395 |             "user": "u0"
1396 |           }
1397 |         ],
1398 |         "ios": [
1399 |           {
1400 |             "inputs": [
1401 |               "pd.DataFrame({'A': {0: 0, 1: 3, 2: 6}, 'B': {0: 1, 1: 4, 2: 7}, 'C': {0: 2, 1: 5, 2: 8}})",
1402 |               "pd.DataFrame({'D': {0: 0, 1: 3}, 'E': {0: 1, 1: 4}, 'F': {0: 2, 1: 5}})"
1403 |             ],
1404 |             "output": "pd.DataFrame({'D': {0: 0, 1: 3, 2: 6}, 'E': {0: 1, 1: 4, 2: 7}, 'F': {0: 2, 1: 5, 2: 8}})",
1405 |             "invars": [
1406 |               "df1",
1407 |               "df2"
1408 |             ],
1409 |             "outvar": "df1"
1410 |           }
1411 |         ],
1412 |         "solutions": [
1413 |           "df1.columns = df2.columns\n"
1414 |         ]
1415 |       }
1416 |     }
1417 |   },
1418 |   "45": {
1419 |     "sets": {
1420 |       "A": {
1421 |         "queries": [
1422 |           {
1423 |             "query": "Split values in column 'Name' of df by space and add to columns 'First Name' and 'Last Name'",
1424 |             "user": "u0"
1425 |           }
1426 |         ],
1427 |         "ios": [
1428 |           {
1429 |             "inputs": [
1430 |               "pd.DataFrame({'Name': {0: 'ABC DEF', 1: 'ABX QRT', 2: 'DAV XXY', 3: 'FPM NOP', 4: 'QQS RDG', 5: 'GZT UVL'}, 'Marks': {0: 12, 1: 16, 2: 9, 3: 7, 4: 14, 5: 19}, 'Subject': {0: 'Chemistry', 1: 'Chemistry', 2: 'Physics', 3: 'Physics', 4: 'Chemistry', 5: 'Biology'}})"
1431 |             ],
1432 |             "output": "pd.DataFrame({'Name': {0: 'ABC DEF', 1: 'ABX QRT', 2: 'DAV XXY', 3: 'FPM NOP', 4: 'QQS RDG', 5: 'GZT UVL'}, 'Marks': {0: 12, 1: 16, 2: 9, 3: 7, 4: 14, 5: 19}, 'Subject': {0: 'Chemistry', 1: 'Chemistry', 2: 'Physics', 3: 'Physics', 4: 'Chemistry', 5: 'Biology'}, 'First Name': {0: 'ABC', 1: 'ABX', 2: 'DAV', 3: 'FPM', 4: 'QQS', 5: 'GZT'}, 'Last Name': {0: 'DEF', 1: 'QRT', 2: 'XXY', 3: 'NOP', 4: 'RDG', 5: 'UVL'}})",
1433 |             "invars": [
1434 |               "df"
1435 |             ],
1436 |             "outvar": "df"
1437 |           }
1438 |         ],
1439 |         "solutions": [
1440 |           "df['First Name'], df['Last Name'] = df['Name'].str.split(' ', 1).str\n"
1441 |         ]
1442 |       }
1443 |     }
1444 |   },
1445 |   "46": {
1446 |     "sets": {
1447 |       "A": {
1448 |         "queries": [
1449 |           {
1450 |             "query": "get all rows where 'Name' does not contain 'AB'",
1451 |             "user": "u0"
1452 |           }
1453 |         ],
1454 |         "ios": [
1455 |           {
1456 |             "inputs": [
1457 |               "pd.DataFrame({'Name': {0: 'ABC DEF', 1: 'ABX QRT', 2: 'DAV XXY', 3: 'FPM NOP', 4: 'QQS RDG', 5: 'GZT UVL'}, 'Marks': {0: 12, 1: 16, 2: 9, 3: 7, 4: 14, 5: 19}, 'Subject': {0: 'Chemistry', 1: 'Chemistry', 2: 'Physics', 3: 'Physics', 4: 'Chemistry', 5: 'Biology'}})"
1458 |             ],
1459 |             "output": "pd.DataFrame({'Name': {2: 'DAV XXY', 3: 'FPM NOP', 4: 'QQS RDG', 5: 'GZT UVL'}, 'Marks': {2: 9, 3: 7, 4: 14, 5: 19}, 'Subject': {2: 'Physics', 3: 'Physics', 4: 'Chemistry', 5: 'Biology'}})",
1460 |             "invars": [
1461 |               "df"
1462 |             ],
1463 |             "outvar": "out"
1464 |           }
1465 |         ],
1466 |         "solutions": [
1467 |           "out = df[~df['Name'].str.contains('AB')]\n"
1468 |         ]
1469 |       }
1470 |     }
1471 |   },
1472 |   "47": {
1473 |     "sets": {
1474 |       "A": {
1475 |         "queries": [
1476 |           {
1477 |             "query": "find 'Marks' of students in 'Subject' Chemistry and Physics",
1478 |             "user": "u0"
1479 |           }
1480 |         ],
1481 |         "ios": [
1482 |           {
1483 |             "inputs": [
1484 |               "pd.DataFrame({'Name': {0: 'ABC DEF', 1: 'ABX QRT', 2: 'DAV XXY', 3: 'FPM NOP', 4: 'QQS RDG', 5: 'GZT UVL'}, 'Marks': {0: 12, 1: 16, 2: 9, 3: 7, 4: 14, 5: 19}, 'Subject': {0: 'Chemistry', 1: 'Chemistry', 2: 'Physics', 3: 'Physics', 4: 'Chemistry', 5: 'Biology'}})"
1485 |             ],
1486 |             "output": "_s = pd.Series([12, 16, 9, 7, 14])\n_s.name = \"Marks\"\n_s",
1487 |             "invars": [
1488 |               "df"
1489 |             ],
1490 |             "outvar": "out"
1491 |           }
1492 |         ],
1493 |         "solutions": [
1494 |           "out = df.loc[df['Subject'].isin(['Chemistry', 'Physics']), 'Marks'] \n"
1495 |         ]
1496 |       }
1497 |     }
1498 |   },
1499 |   "48": {
1500 |     "sets": {
1501 |       "A": {
1502 |         "queries": [
1503 |           {
1504 |             "query": "add all columns in beta to alpha",
1505 |             "user": "u0"
1506 |           }
1507 |         ],
1508 |         "ios": [
1509 |           {
1510 |             "inputs": [
1511 |               "pd.DataFrame({'A': {0: 0, 1: 3, 2: 6}, 'B': {0: 1, 1: 4, 2: 7}, 'C': {0: 2, 1: 5, 2: 8}})",
1512 |               "pd.DataFrame({'D': {0: 0, 1: 3}, 'E': {0: 1, 1: 4}, 'F': {0: 2, 1: 5}})"
1513 |             ],
1514 |             "output": "pd.DataFrame({'A': {0: 0, 1: 3, 2: 6}, 'B': {0: 1, 1: 4, 2: 7}, 'C': {0: 2, 1: 5, 2: 8}, 'D': {0: 0.0, 1: 3.0, 2: None}, 'E': {0: 1.0, 1: 4.0, 2: None}, 'F': {0: 2.0, 1: 5.0, 2: None}})",
1515 |             "invars": [
1516 |               "alpha",
1517 |               "beta"
1518 |             ],
1519 |             "outvar": "alpha"
1520 |           }
1521 |         ],
1522 |         "solutions": [
1523 |           "alpha = pd.concat([alpha, beta], 1)\n"
1524 |         ]
1525 |       }
1526 |     }
1527 |   },
1528 |   "49": {
1529 |     "sets": {
1530 |       "A": {
1531 |         "queries": [
1532 |           {
1533 |             "query": "for each student, get a list of scores obtained",
1534 |             "user": "u0"
1535 |           }
1536 |         ],
1537 |         "ios": [
1538 |           {
1539 |             "inputs": [
1540 |               "pd.DataFrame({'student': {0: 'alpha', 1: 'beta', 2: 'alpha', 3: 'beta', 4: 'omega', 5: 'alpha'}, 'score': {0: 12, 1: 16, 2: 9, 3: 7, 4: 14, 5: 19}, 'subject': {0: 'chem', 1: 'chem', 2: 'phy', 3: 'phy', 4: 'chem', 5: 'bio'}})"
1541 |             ],
1542 |             "output": "_s = pd.Series([[12, 9, 19], [16, 7], [14]])\n_s.name = \"score\"\n_s.index.names = ['student']\n_s",
1543 |             "invars": [
1544 |               "df",
1545 |               "list"
1546 |             ],
1547 |             "outvar": "out"
1548 |           }
1549 |         ],
1550 |         "solutions": [
1551 |           "out = df.groupby(['student'])['score'].apply(list)\n"
1552 |         ]
1553 |       }
1554 |     }
1555 |   },
1556 |   "50": {
1557 |     "sets": {
1558 |       "A": {
1559 |         "queries": [
1560 |           {
1561 |             "query": "for every 'student' find maximum 'score' in any 'subject'",
1562 |             "user": "u0"
1563 |           }
1564 |         ],
1565 |         "ios": [
1566 |           {
1567 |             "inputs": [
1568 |               "pd.DataFrame({'student': {0: 'alpha', 1: 'beta', 2: 'alpha', 3: 'beta', 4: 'omega', 5: 'alpha'}, 'score': {0: 12, 1: 16, 2: 9, 3: 7, 4: 14, 5: 19}, 'subject': {0: 'chem', 1: 'chem', 2: 'phy', 3: 'phy', 4: 'chem', 5: 'bio'}})"
1569 |             ],
1570 |             "output": "_s = pd.Series([19, 16, 14])\n_s.name = \"score\"\n_s.index.names = ['student']\n_s",
1571 |             "invars": [
1572 |               "df"
1573 |             ],
1574 |             "outvar": "out"
1575 |           }
1576 |         ],
1577 |         "solutions": [
1578 |           "out = df.groupby('student')['score'].max()\n"
1579 |         ]
1580 |       }
1581 |     }
1582 |   },
1583 |   "51": {
1584 |     "sets": {
1585 |       "A": {
1586 |         "queries": [
1587 |           {
1588 |             "query": "reorder the columns so that they are sorted",
1589 |             "user": "u0"
1590 |           }
1591 |         ],
1592 |         "ios": [
1593 |           {
1594 |             "inputs": [
1595 |               "pd.DataFrame({'B': {0: 0, 1: 4, 2: 8}, 'C': {0: 1, 1: 5, 2: 9}, 'D': {0: 2, 1: 6, 2: 10}, 'A': {0: 3, 1: 7, 2: 11}})"
1596 |             ],
1597 |             "output": "pd.DataFrame({'A': {0: 3, 1: 7, 2: 11}, 'B': {0: 0, 1: 4, 2: 8}, 'C': {0: 1, 1: 5, 2: 9}, 'D': {0: 2, 1: 6, 2: 10}})",
1598 |             "invars": [
1599 |               "df",
1600 |               "list",
1601 |               "sorted"
1602 |             ],
1603 |             "outvar": "df"
1604 |           },
1605 |           {
1606 |             "inputs": [
1607 |               "pd.DataFrame({'Z': {0: 0, 1: 5, 2: 10, 3: 15, 4: 20}, 'V': {0: 1, 1: 6, 2: 11, 3: 16, 4: 21}, 'A': {0: 2, 1: 7, 2: 12, 3: 17, 4: 22}, 'X': {0: 3, 1: 8, 2: 13, 3: 18, 4: 23}, 'M': {0: 4, 1: 9, 2: 14, 3: 19, 4: 24}})"
1608 |             ],
1609 |             "output": "pd.DataFrame({'A': {0: 2, 1: 7, 2: 12, 3: 17, 4: 22}, 'M': {0: 4, 1: 9, 2: 14, 3: 19, 4: 24}, 'V': {0: 1, 1: 6, 2: 11, 3: 16, 4: 21}, 'X': {0: 3, 1: 8, 2: 13, 3: 18, 4: 23}, 'Z': {0: 0, 1: 5, 2: 10, 3: 15, 4: 20}})",
1610 |             "invars": [
1611 |               "df",
1612 |               "list",
1613 |               "sorted"
1614 |             ],
1615 |             "outvar": "df"
1616 |           }
1617 |         ],
1618 |         "solutions": [
1619 |           "df = df[sorted(list(df.columns))]\n"
1620 |         ]
1621 |       }
1622 |     }
1623 |   },
1624 |   "52": {
1625 |     "sets": {
1626 |       "A": {
1627 |         "queries": [
1628 |           {
1629 |             "query": "remove rows whose indices are duplicated (keeping first)",
1630 |             "user": "u0"
1631 |           }
1632 |         ],
1633 |         "ios": [
1634 |           {
1635 |             "inputs": [
1636 |               "pd.DataFrame([{'b': 1, 'c': 4}, {'b': 1, 'c': 7}, {'b': 1, 'c': 5}, {'b': 3, 'c': 3}, {'b': 3, 'c': 1}, {'b': 3, 'c': 9}, {'b': 3, 'c': 11}, {'b': 4, 'c': 12}, {'b': 4, 'c': 15}, {'b': 4, 'c': 4}, {'b': 4, 'c': 9}], index=[1, 1, 1, 2, 2, 3, 3, 4, 5, 6, 6])"
1637 |             ],
1638 |             "output": "pd.DataFrame({'b': {1: 1, 2: 3, 3: 3, 4: 4, 5: 4, 6: 4}, 'c': {1: 4, 2: 3, 3: 9, 4: 12, 5: 15, 6: 4}})",
1639 |             "invars": [
1640 |               "df"
1641 |             ],
1642 |             "outvar": "df"
1643 |           }
1644 |         ],
1645 |         "solutions": [
1646 |           "df = df[~df.index.duplicated(keep='first')]\n"
1647 |         ]
1648 |       }
1649 |     }
1650 |   },
1651 |   "53": {
1652 |     "sets": {
1653 |       "A": {
1654 |         "queries": [
1655 |           {
1656 |             "query": "convert index to a column",
1657 |             "user": "u0"
1658 |           }
1659 |         ],
1660 |         "ios": [
1661 |           {
1662 |             "inputs": [
1663 |               "pd.DataFrame({'B': {0: 0, 1: 4, 2: 8}, 'C': {0: 1, 1: 5, 2: 9}, 'D': {0: 2, 1: 6, 2: 10}, 'A': {0: 3, 1: 7, 2: 11}})"
1664 |             ],
1665 |             "output": "pd.DataFrame({'B': {0: 0, 1: 4, 2: 8}, 'C': {0: 1, 1: 5, 2: 9}, 'D': {0: 2, 1: 6, 2: 10}, 'A': {0: 3, 1: 7, 2: 11}, 'index': {0: 0, 1: 1, 2: 2}})",
1666 |             "invars": [
1667 |               "df"
1668 |             ],
1669 |             "outvar": "df"
1670 |           }
1671 |         ],
1672 |         "solutions": [
1673 |           "df['index'] = df.index\n"
1674 |         ]
1675 |       }
1676 |     }
1677 |   },
1678 |   "54": {
1679 |     "sets": {
1680 |       "A": {
1681 |         "queries": [
1682 |           {
1683 |             "query": "For every row in df1, update 'common' column to True if value in column 'A' of df1 also lies in column 'B' of df2",
1684 |             "user": "u0"
1685 |           }
1686 |         ],
1687 |         "ios": [
1688 |           {
1689 |             "inputs": [
1690 |               "pd.DataFrame({'A': {0: 2, 1: 3, 2: 9, 3: 42, 4: 13, 5: 55}, 'common': {0: False, 1: False, 2: False, 3: False, 4: False, 5: False}})",
1691 |               "pd.DataFrame({'B': {0: 13, 1: 14, 2: 3, 3: 7, 4: 42, 5: 66, 6: 19, 7: 55}, 'common': {0: True, 1: True, 2: True, 3: True, 4: True, 5: True, 6: True, 7: True}})"
1692 |             ],
1693 |             "output": "pd.DataFrame({'A': {0: 2, 1: 3, 2: 9, 3: 42, 4: 13, 5: 55}, 'common': {0: False, 1: True, 2: False, 3: True, 4: True, 5: True}})",
1694 |             "invars": [
1695 |               "df1",
1696 |               "df2"
1697 |             ],
1698 |             "outvar": "df1"
1699 |           }
1700 |         ],
1701 |         "solutions": [
1702 |           "df1.loc[df1.A.isin(df2.B),'common'] = True\n"
1703 |         ]
1704 |       }
1705 |     }
1706 |   },
1707 |   "55": {
1708 |     "sets": {
1709 |       "A": {
1710 |         "queries": [
1711 |           {
1712 |             "query": "make values in 'common' of df2 to False if value in column 'B' of df2 do not lie in column 'A' of df1",
1713 |             "user": "u0"
1714 |           }
1715 |         ],
1716 |         "ios": [
1717 |           {
1718 |             "inputs": [
1719 |               "pd.DataFrame({'A': {0: 2, 1: 3, 2: 9, 3: 42, 4: 13, 5: 55}, 'common': {0: False, 1: False, 2: False, 3: False, 4: False, 5: False}})",
1720 |               "pd.DataFrame({'B': {0: 13, 1: 14, 2: 3, 3: 7, 4: 42, 5: 66, 6: 19, 7: 55}, 'common': {0: True, 1: True, 2: True, 3: True, 4: True, 5: True, 6: True, 7: True}})"
1721 |             ],
1722 |             "output": "pd.DataFrame({'B': {0: 13, 1: 14, 2: 3, 3: 7, 4: 42, 5: 66, 6: 19, 7: 55}, 'common': {0: True, 1: False, 2: True, 3: False, 4: True, 5: False, 6: False, 7: True}})",
1723 |             "invars": [
1724 |               "df1",
1725 |               "df2"
1726 |             ],
1727 |             "outvar": "df2"
1728 |           }
1729 |         ],
1730 |         "solutions": [
1731 |           "df2.loc[~df2.B.isin(df1.A),'common'] = False\n"
1732 |         ]
1733 |       }
1734 |     }
1735 |   },
1736 |   "56": {
1737 |     "sets": {
1738 |       "A": {
1739 |         "queries": [
1740 |           {
1741 |             "query": "Remove substring 'Name:' from column 'country' of df",
1742 |             "user": "u0"
1743 |           }
1744 |         ],
1745 |         "ios": [
1746 |           {
1747 |             "inputs": [
1748 |               "pd.DataFrame({'country': {0: 'Name:US', 1: 'FR', 2: 'Name:DE', 3: 'CN'}, 'city': {0: 'Cal', 1: 'PAR', 2: 'BER', 3: 'SHG'}})"
1749 |             ],
1750 |             "output": "pd.DataFrame({'country': {0: 'US', 1: 'FR', 2: 'DE', 3: 'CN'}, 'city': {0: 'Cal', 1: 'PAR', 2: 'BER', 3: 'SHG'}})",
1751 |             "invars": [
1752 |               "df"
1753 |             ],
1754 |             "outvar": "df"
1755 |           }
1756 |         ],
1757 |         "solutions": [
1758 |           "df['country'] = df['country'].str.replace('Name:','')\n"
1759 |         ]
1760 |       }
1761 |     }
1762 |   },
1763 |   "57": {
1764 |     "sets": {
1765 |       "A": {
1766 |         "queries": [
1767 |           {
1768 |             "query": "Pick out the rows of df where 'a' not eq 4 and 'b' gt 3 and assign to dfout",
1769 |             "user": "u0"
1770 |           }
1771 |         ],
1772 |         "ios": [
1773 |           {
1774 |             "inputs": [
1775 |               "pd.DataFrame({'a': {0: 4, 1: 3, 2: 3, 3: 5, 4: 2, 5: 5, 6: 1, 7: 4, 8: 0, 9: 0}, 'b': {0: 4, 1: 3, 2: 4, 3: 3, 4: 2, 5: 7, 6: 9, 7: 6, 8: 3, 9: 0}})"
1776 |             ],
1777 |             "output": "pd.DataFrame({'a': {2: 3, 5: 5, 6: 1}, 'b': {2: 4, 5: 7, 6: 9}})",
1778 |             "invars": [
1779 |               "df"
1780 |             ],
1781 |             "outvar": "dfout"
1782 |           }
1783 |         ],
1784 |         "solutions": [
1785 |           "dfout = df[(df.a!=4) & (df.b>3)]\n"
1786 |         ]
1787 |       }
1788 |     }
1789 |   },
1790 |   "58": {
1791 |     "sets": {
1792 |       "A": {
1793 |         "queries": [
1794 |           {
1795 |             "query": "Update column 'present' to True in dataframe df where string 'dog' occurs inside column 'pet'",
1796 |             "user": "u0"
1797 |           }
1798 |         ],
1799 |         "ios": [
1800 |           {
1801 |             "inputs": [
1802 |               "pd.DataFrame({'pet': {0: 'dog-a', 1: 'dog-b', 2: 'cat', 3: 'goldfish', 4: 'c-dog', 5: 'hamster'}, 'present': {0: False, 1: False, 2: False, 3: False, 4: False, 5: False}})"
1803 |             ],
1804 |             "output": "pd.DataFrame({'pet': {0: 'dog-a', 1: 'dog-b', 2: 'cat', 3: 'goldfish', 4: 'c-dog', 5: 'hamster'}, 'present': {0: True, 1: True, 2: False, 3: False, 4: True, 5: False}})",
1805 |             "invars": [
1806 |               "df"
1807 |             ],
1808 |             "outvar": "df"
1809 |           }
1810 |         ],
1811 |         "solutions": [
1812 |           "df.loc[df.pet.str.contains('dog'),'present'] = True\n"
1813 |         ]
1814 |       }
1815 |     }
1816 |   },
1817 |   "59": {
1818 |     "sets": {
1819 |       "A": {
1820 |         "queries": [
1821 |           {
1822 |             "query": "Increase 'count' column by 1 when column 'pet' contains substring 'dog' or 'cat'",
1823 |             "user": "u0"
1824 |           }
1825 |         ],
1826 |         "ios": [
1827 |           {
1828 |             "inputs": [
1829 |               "pd.DataFrame({'pet': {0: 'dog-a', 1: 'dog-b', 2: 'cat', 3: 'goldfish', 4: 'c-dog', 5: 'hamster'}, 'count': {0: 12, 1: 15, 2: 4, 3: 4, 4: 15, 5: 2}})"
1830 |             ],
1831 |             "output": "pd.DataFrame({'pet': {0: 'dog-a', 1: 'dog-b', 2: 'cat', 3: 'goldfish', 4: 'c-dog', 5: 'hamster'}, 'count': {0: 13, 1: 16, 2: 5, 3: 4, 4: 16, 5: 2}})",
1832 |             "invars": [
1833 |               "df"
1834 |             ],
1835 |             "outvar": "df"
1836 |           }
1837 |         ],
1838 |         "solutions": [
1839 |           "df.loc[df.pet.str.contains('dog|cat'),'count'] += 1\n"
1840 |         ]
1841 |       }
1842 |     }
1843 |   },
1844 |   "60": {
1845 |     "sets": {
1846 |       "A": {
1847 |         "queries": [
1848 |           {
1849 |             "query": "For the series data with multiple indices, create dataframe df with columns as the outermost level of index",
1850 |             "user": "u0"
1851 |           }
1852 |         ],
1853 |         "ios": [
1854 |           {
1855 |             "inputs": [
1856 |               "pd.Series([1, 2, 3, 4])"
1857 |             ],
1858 |             "output": "pd.DataFrame({'one': {'a': 1, 'b': 2}, 'two': {'a': 3, 'b': 4}})",
1859 |             "invars": [
1860 |               "data"
1861 |             ],
1862 |             "outvar": "out"
1863 |           }
1864 |         ],
1865 |         "solutions": [
1866 |           "out = data.unstack(level=0)\n"
1867 |         ]
1868 |       }
1869 |     }
1870 |   },
1871 |   "61": {
1872 |     "sets": {
1873 |       "A": {
1874 |         "queries": [
1875 |           {
1876 |             "query": "For df with multiindex columns, remove column 'outer_col' from df",
1877 |             "user": "u0"
1878 |           }
1879 |         ],
1880 |         "ios": [
1881 |           {
1882 |             "inputs": [
1883 |               "_df = pd.DataFrame({('c', 'e'): {(1, 2): 3, (5, 6): 7, (9, 10): 11}, ('d', 'f'): {(1, 2): 4, (5, 6): 8, (9, 10): 12}})\n_df.columns.names = ['outer_col', 'inner_col']\n_df.index.names = ['a', 'b']\n_df"
1884 |             ],
1885 |             "output": "_df = pd.DataFrame({'e': {(1, 2): 3, (5, 6): 7, (9, 10): 11}, 'f': {(1, 2): 4, (5, 6): 8, (9, 10): 12}})\n_df.columns.names = ['inner_col']\n_df.index.names = ['a', 'b']\n_df",
1886 |             "invars": [
1887 |               "df"
1888 |             ],
1889 |             "outvar": "dfout"
1890 |           }
1891 |         ],
1892 |         "solutions": [
1893 |           "dfout = df.droplevel('outer_col', axis=1)\n"
1894 |         ]
1895 |       }
1896 |     }
1897 |   },
1898 |   "62": {
1899 |     "sets": {
1900 |       "A": {
1901 |         "queries": [
1902 |           {
1903 |             "query": "compute number of times 'mouse' is present in dataframe df",
1904 |             "user": "u0"
1905 |           }
1906 |         ],
1907 |         "ios": [
1908 |           {
1909 |             "inputs": [
1910 |               "pd.DataFrame({'pet1': {0: 'mouse', 1: 'mouse', 2: 'cat', 3: 'goldfish', 4: 'bad-mouse', 5: 'hamster', 6: 'lion', 7: 'tiger-mouse', 8: 'mouse'}, 'pet2': {0: 12, 1: 'mouse', 2: 'dog', 3: 'mouse', 4: 'snake', 5: 'mouse', 6: 'tiger', 7: 'mouse', 8: 'mouse'}})"
1911 |             ],
1912 |             "output": "np.int64(8)",
1913 |             "invars": [
1914 |               "df"
1915 |             ],
1916 |             "outvar": "out"
1917 |           }
1918 |         ],
1919 |         "solutions": [
1920 |           "out = (df=='mouse').sum().sum()\n"
1921 |         ]
1922 |       }
1923 |     }
1924 |   },
1925 |   "63": {
1926 |     "sets": {
1927 |       "A": {
1928 |         "queries": [
1929 |           {
1930 |             "query": "compute number rows in which 'mouse' is present in dataframe df",
1931 |             "user": "u0"
1932 |           }
1933 |         ],
1934 |         "ios": [
1935 |           {
1936 |             "inputs": [
1937 |               "pd.DataFrame({'pet1': {0: 'mouse', 1: 'mouse', 2: 'cat', 3: 'goldfish', 4: 'bad-mouse', 5: 'hamster', 6: 'lion', 7: 'tiger-mouse', 8: 'mouse'}, 'pet2': {0: 12, 1: 'mouse', 2: 'dog', 3: 'mouse', 4: 'snake', 5: 'mouse', 6: 'tiger', 7: 'mouse', 8: 'mouse'}})"
1938 |             ],
1939 |             "output": "np.int64(6)",
1940 |             "invars": [
1941 |               "df"
1942 |             ],
1943 |             "outvar": "out"
1944 |           }
1945 |         ],
1946 |         "solutions": [
1947 |           "out = (df=='mouse').any(1).sum()\n"
1948 |         ]
1949 |       }
1950 |     }
1951 |   },
1952 |   "64": {
1953 |     "sets": {
1954 |       "A": {
1955 |         "queries": [
1956 |           {
1957 |             "query": "find number of elements in column 'phone' of df which are comprised totally of digits",
1958 |             "user": "u0"
1959 |           }
1960 |         ],
1961 |         "ios": [
1962 |           {
1963 |             "inputs": [
1964 |               "pd.DataFrame({'phone': {0: '911', 1: '888', 2: '1-2-3', 3: '+00', 4: 'abc', 5: '334', 6: '00000', 7: '9/9/9', 8: '12/12/2012'}})"
1965 |             ],
1966 |             "output": "np.int64(4)",
1967 |             "invars": [
1968 |               "df"
1969 |             ],
1970 |             "outvar": "out"
1971 |           }
1972 |         ],
1973 |         "solutions": [
1974 |           "out = df['phone'].str.isdigit().sum()\n"
1975 |         ]
1976 |       }
1977 |     }
1978 |   },
1979 |   "65": {
1980 |     "sets": {
1981 |       "A": {
1982 |         "queries": [
1983 |           {
1984 |             "query": "Select the rows of df where column 'X' lies in integers between inclusive 4-10",
1985 |             "user": "u0"
1986 |           }
1987 |         ],
1988 |         "ios": [
1989 |           {
1990 |             "inputs": [
1991 |               "pd.DataFrame({'X': {0: 2.0, 1: 5.0, 2: 5.5, 3: 8.0, 4: 16.0, 5: 12.0, 6: 10.0, 7: 11.0, 8: 4.0, 9: 21.0, 10: 20.0, 11: 4.0, 12: 14.0}, 'Y': {0: 5, 1: 7, 2: 4, 3: 2, 4: 7, 5: 12, 6: 0, 7: 1, 8: 4, 9: 2, 10: 3, 11: 10, 12: 3}})"
1992 |             ],
1993 |             "output": "pd.DataFrame({'X': {1: 5.0, 3: 8.0, 6: 10.0, 8: 4.0, 11: 4.0}, 'Y': {1: 7, 3: 2, 6: 0, 8: 4, 11: 10}})",
1994 |             "invars": [
1995 |               "df",
1996 |               "list",
1997 |               "range"
1998 |             ],
1999 |             "outvar": "out"
2000 |           },
2001 |           {
2002 |             "inputs": [
2003 |               "pd.DataFrame({'X': {0: 2.0, 1: 5.0, 2: 5.5, 3: 8.0, 4: 20.0, 5: 4.0, 6: 14.0}, 'Z': {0: 5, 1: 7, 2: 4, 3: 2, 4: 3, 5: 10, 6: 3}})"
2004 |             ],
2005 |             "output": "pd.DataFrame({'X': {0: 5, 1: 8, 2: 4}, 'Z': {0: 7, 1: 2, 2: 10}})",
2006 |             "invars": [
2007 |               "df",
2008 |               "list",
2009 |               "range"
2010 |             ],
2011 |             "outvar": "out"
2012 |           }
2013 |         ],
2014 |         "solutions": [
2015 |           "out = df[df['X'].isin(list(range(4,11)))]\n"
2016 |         ]
2017 |       }
2018 |     }
2019 |   },
2020 |   "66": {
2021 |     "sets": {
2022 |       "A": {
2023 |         "queries": [
2024 |           {
2025 |             "query": "Select the rows of df where column 'Y' lies in integers between inclusive 4-10 or 14-20",
2026 |             "user": "u0"
2027 |           }
2028 |         ],
2029 |         "ios": [
2030 |           {
2031 |             "inputs": [
2032 |               "pd.DataFrame({'Y': {0: 2.0, 1: 5.0, 2: 5.5, 3: 8.0, 4: 16.0, 5: 12.0, 6: 10.0, 7: 11.0, 8: 4.0, 9: 21.0, 10: 20.0, 11: 10.0, 12: 14.0}, 'Z': {0: 5, 1: 7, 2: 4, 3: 2, 4: 7, 5: 12, 6: 0, 7: 1, 8: 4, 9: 2, 10: 3, 11: 10, 12: 3}})"
2033 |             ],
2034 |             "output": "pd.DataFrame({'Y': {1: 5.0, 3: 8.0, 4: 16.0, 6: 10.0, 8: 4.0, 10: 20.0, 11: 10.0, 12: 14.0}, 'Z': {1: 7, 3: 2, 4: 7, 6: 0, 8: 4, 10: 3, 11: 10, 12: 3}})",
2035 |             "invars": [
2036 |               "df",
2037 |               "list",
2038 |               "range"
2039 |             ],
2040 |             "outvar": "out"
2041 |           },
2042 |           {
2043 |             "inputs": [
2044 |               "pd.DataFrame({'Y': {0: 2.0, 1: 5.0, 2: 5.5, 3: 8.0, 4: 20.0, 5: 4.0, 6: 14.0}, 'X': {0: 5, 1: 7, 2: 4, 3: 2, 4: 3, 5: 10, 6: 3}})"
2045 |             ],
2046 |             "output": "pd.DataFrame({'Y': {0: 5, 1: 8, 2: 20, 3: 4, 4: 14}, 'X': {0: 7, 1: 2, 2: 3, 3: 10, 4: 3}})",
2047 |             "invars": [
2048 |               "df",
2049 |               "list",
2050 |               "range"
2051 |             ],
2052 |             "outvar": "out"
2053 |           }
2054 |         ],
2055 |         "solutions": [
2056 |           "out = df[df['Y'].isin(list(range(4,11))+list(range(14,21)))]\n"
2057 |         ]
2058 |       }
2059 |     }
2060 |   },
2061 |   "67": {
2062 |     "sets": {
2063 |       "A": {
2064 |         "queries": [
2065 |           {
2066 |             "query": "Change the type of column 'colA' of df to 'float32'",
2067 |             "user": "u0"
2068 |           }
2069 |         ],
2070 |         "ios": [
2071 |           {
2072 |             "inputs": [
2073 |               "pd.DataFrame({'colA': {0: 2, 1: 5, 2: 8, 3: 16, 4: 12, 5: 10, 6: 11, 7: 4, 8: 21, 9: 20, 10: 10, 11: 14}, 'colB': {0: 5, 1: 7, 2: 2, 3: 7, 4: 12, 5: 0, 6: 1, 7: 4, 8: 2, 9: 3, 10: 10, 11: 3}})"
2074 |             ],
2075 |             "output": "pd.DataFrame({'colA': {0: 2.0, 1: 5.0, 2: 8.0, 3: 16.0, 4: 12.0, 5: 10.0, 6: 11.0, 7: 4.0, 8: 21.0, 9: 20.0, 10: 10.0, 11: 14.0}, 'colB': {0: 5, 1: 7, 2: 2, 3: 7, 4: 12, 5: 0, 6: 1, 7: 4, 8: 2, 9: 3, 10: 10, 11: 3}})",
2076 |             "invars": [
2077 |               "df"
2078 |             ],
2079 |             "outvar": "dfout"
2080 |           }
2081 |         ],
2082 |         "solutions": [
2083 |           "dfout = df.astype({'colA' : 'float32'})\n"
2084 |         ]
2085 |       }
2086 |     }
2087 |   }
2088 | }


--------------------------------------------------------------------------------