├── .github └── workflows │ └── blank.yml ├── .gitignore ├── Makefile ├── README.md ├── license.txt ├── logo ├── color.jpg ├── logoBig.jpg └── logoSmall.jpg ├── poetry.lock ├── pyproject.toml ├── syntheticdb ├── __init__.py ├── db_core.py ├── distributions.py ├── main.py └── query_parser.py └── tests ├── __init__.py └── test_sql.py /.github/workflows/blank.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ main ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "build" 17 | build: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-latest 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v2 25 | 26 | - name: install-poetry 27 | run: curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python - 28 | 29 | 30 | - name: Configure creds 31 | env: # Or as an environment variable 32 | PYPI_USER: ${{ secrets.PYPI_USER }} 33 | PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 34 | run: source $HOME/.poetry/env && poetry config http-basic.pypi $PYPI_USER $PYPI_PASSWORD 35 | 36 | # Runs a single command using the runners shell 37 | - name: Release new version of package 38 | run: source $HOME/.poetry/env && make release 39 | 40 | - name: Commit changes 41 | uses: EndBug/add-and-commit@v7 42 | with: 43 | default_author: github_actor 44 | message: '[skip ci]' 45 | push: true 46 | add: '.' 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | dist/ 3 | *.pdf 4 | venv/ 5 | *.png 6 | __pycache__ -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | poetry build 3 | 4 | publish: 5 | poetry publish 6 | 7 | bump: 8 | poetry version patch 9 | 10 | release: bump build publish 11 | 12 | format: 13 | black syntheticdb/ tests/ 14 | 15 | test: 16 | pytest -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SyntheticDB 2 | 3 | 4 | 5 | Simulate querying industry-scale ML datasets locally with real SQL statements 6 | without having to access, manage or pay for industry-scale infrastructure. 7 | 8 | We're on [Twitter](https://twitter.com/SynthAnalytics)! 9 | 10 | ## How Does It Work? 11 | 12 | SyntheticDB generates data only on an as-needed basis. 13 | That is to say, instead of generating 10000 rows and 14 | then filtering to get back 250 rows, 15 | SyntheticDB returns you 250 rows without needing 16 | to materialize the whole original dataset. 17 | 18 | Data returned from SyntheticDB queries follow statistical distributions 19 | that you specify when defining the columns for your tables. 20 | In particular, each column is defined by its data type and distribution. 21 | When you use WHERE clauses in your queries SyntheticDB 22 | samples from the corresponding conditional distributions. 23 | 24 | Check out our [demo notebook](https://colab.research.google.com/drive/1mtF_VAENjdRqodGh9kSXj_AWeg4gJxnI?usp=sharing) 25 | to see SyntheticDB in action! 26 | 27 | ### Supported Features 28 | 29 | - Build synthetic tables by specifying distributions for each column to follow 30 | - Use SQL to query your synthetic dataset just as you would query a real DB 31 | - Supported distributions: uniform, log-uniform, normal, log-normal, gamma, exponential, beta, weibull 32 | 33 | ### Limitations 34 | 35 | - Currently the only supported data type is Float 36 | - SQL JOIN's are not yet supported 37 | - Specifying correlation between columns is not yet supported - i.e., all columns are pairwise independent random variables 38 | 39 | ### What's Next? 40 | 41 | In future releases we aim to: 42 | - support additional data types and distributions 43 | - support more complex SQL queries 44 | - support specifying correlations between columns 45 | - optimize query / sampling performance 46 | 47 | ## About The Authors 48 | 49 | [Sam Corzine](https://github.com/samcorzine) is an ML Engineer at Fetch Rewards with 5 years experience working in data-intensive cloud computing. 50 | Sam got his BA in Mathematics. 51 | 52 | [Matt Garvin](https://github.com/mattgarvin1) is a Software Engineer at Coinbase, having previously worked in the 53 | [Center For Translational Data Science](https://ctds.uchicago.edu/) at the University Of Chicago. 54 | Matt got his BA/MS in Mathematics. 55 | 56 | Send us a [message](mailto:contact.synthetic.analytics@gmail.com?subject=Hello%20SyntheticDB!), we'd love to talk! 57 | 58 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2021 Sam Corzine, Matt Garvin 4 | 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the “Software”), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /logo/color.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SyntheticAnalytics/SyntheticDB/53d0e3e57a3b51725c7f9b310e3a8c6bcdd7834b/logo/color.jpg -------------------------------------------------------------------------------- /logo/logoBig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SyntheticAnalytics/SyntheticDB/53d0e3e57a3b51725c7f9b310e3a8c6bcdd7834b/logo/logoBig.jpg -------------------------------------------------------------------------------- /logo/logoSmall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SyntheticAnalytics/SyntheticDB/53d0e3e57a3b51725c7f9b310e3a8c6bcdd7834b/logo/logoSmall.jpg -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "atomicwrites" 3 | version = "1.4.0" 4 | description = "Atomic file writes." 5 | category = "dev" 6 | optional = false 7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 8 | 9 | [[package]] 10 | name = "attrs" 11 | version = "21.2.0" 12 | description = "Classes Without Boilerplate" 13 | category = "dev" 14 | optional = false 15 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 16 | 17 | [package.extras] 18 | dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"] 19 | docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] 20 | tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"] 21 | tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"] 22 | 23 | [[package]] 24 | name = "colorama" 25 | version = "0.4.4" 26 | description = "Cross-platform colored terminal text." 27 | category = "dev" 28 | optional = false 29 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 30 | 31 | [[package]] 32 | name = "cycler" 33 | version = "0.10.0" 34 | description = "Composable style cycles" 35 | category = "main" 36 | optional = false 37 | python-versions = "*" 38 | 39 | [package.dependencies] 40 | six = "*" 41 | 42 | [[package]] 43 | name = "importlib-metadata" 44 | version = "4.6.3" 45 | description = "Read metadata from Python packages" 46 | category = "dev" 47 | optional = false 48 | python-versions = ">=3.6" 49 | 50 | [package.dependencies] 51 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} 52 | zipp = ">=0.5" 53 | 54 | [package.extras] 55 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] 56 | perf = ["ipython"] 57 | testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] 58 | 59 | [[package]] 60 | name = "iniconfig" 61 | version = "1.1.1" 62 | description = "iniconfig: brain-dead simple config-ini parsing" 63 | category = "dev" 64 | optional = false 65 | python-versions = "*" 66 | 67 | [[package]] 68 | name = "kiwisolver" 69 | version = "1.3.1" 70 | description = "A fast implementation of the Cassowary constraint solver" 71 | category = "main" 72 | optional = false 73 | python-versions = ">=3.6" 74 | 75 | [[package]] 76 | name = "matplotlib" 77 | version = "3.4.2" 78 | description = "Python plotting package" 79 | category = "main" 80 | optional = false 81 | python-versions = ">=3.7" 82 | 83 | [package.dependencies] 84 | cycler = ">=0.10" 85 | kiwisolver = ">=1.0.1" 86 | numpy = ">=1.16" 87 | pillow = ">=6.2.0" 88 | pyparsing = ">=2.2.1" 89 | python-dateutil = ">=2.7" 90 | 91 | [[package]] 92 | name = "numpy" 93 | version = "1.21.1" 94 | description = "NumPy is the fundamental package for array computing with Python." 95 | category = "main" 96 | optional = false 97 | python-versions = ">=3.7" 98 | 99 | [[package]] 100 | name = "packaging" 101 | version = "21.0" 102 | description = "Core utilities for Python packages" 103 | category = "dev" 104 | optional = false 105 | python-versions = ">=3.6" 106 | 107 | [package.dependencies] 108 | pyparsing = ">=2.0.2" 109 | 110 | [[package]] 111 | name = "pandas" 112 | version = "1.3.1" 113 | description = "Powerful data structures for data analysis, time series, and statistics" 114 | category = "main" 115 | optional = false 116 | python-versions = ">=3.7.1" 117 | 118 | [package.dependencies] 119 | numpy = ">=1.17.3" 120 | python-dateutil = ">=2.7.3" 121 | pytz = ">=2017.3" 122 | 123 | [package.extras] 124 | test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] 125 | 126 | [[package]] 127 | name = "pillow" 128 | version = "8.3.1" 129 | description = "Python Imaging Library (Fork)" 130 | category = "main" 131 | optional = false 132 | python-versions = ">=3.6" 133 | 134 | [[package]] 135 | name = "pluggy" 136 | version = "0.13.1" 137 | description = "plugin and hook calling mechanisms for python" 138 | category = "dev" 139 | optional = false 140 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 141 | 142 | [package.dependencies] 143 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 144 | 145 | [package.extras] 146 | dev = ["pre-commit", "tox"] 147 | 148 | [[package]] 149 | name = "py" 150 | version = "1.10.0" 151 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 152 | category = "dev" 153 | optional = false 154 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 155 | 156 | [[package]] 157 | name = "pyparsing" 158 | version = "2.4.7" 159 | description = "Python parsing module" 160 | category = "main" 161 | optional = false 162 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 163 | 164 | [[package]] 165 | name = "pytest" 166 | version = "6.2.4" 167 | description = "pytest: simple powerful testing with Python" 168 | category = "dev" 169 | optional = false 170 | python-versions = ">=3.6" 171 | 172 | [package.dependencies] 173 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 174 | attrs = ">=19.2.0" 175 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 176 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 177 | iniconfig = "*" 178 | packaging = "*" 179 | pluggy = ">=0.12,<1.0.0a1" 180 | py = ">=1.8.2" 181 | toml = "*" 182 | 183 | [package.extras] 184 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] 185 | 186 | [[package]] 187 | name = "python-dateutil" 188 | version = "2.8.2" 189 | description = "Extensions to the standard Python datetime module" 190 | category = "main" 191 | optional = false 192 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 193 | 194 | [package.dependencies] 195 | six = ">=1.5" 196 | 197 | [[package]] 198 | name = "pytz" 199 | version = "2021.1" 200 | description = "World timezone definitions, modern and historical" 201 | category = "main" 202 | optional = false 203 | python-versions = "*" 204 | 205 | [[package]] 206 | name = "scipy" 207 | version = "1.6.1" 208 | description = "SciPy: Scientific Library for Python" 209 | category = "main" 210 | optional = false 211 | python-versions = ">=3.7" 212 | 213 | [package.dependencies] 214 | numpy = ">=1.16.5" 215 | 216 | [[package]] 217 | name = "six" 218 | version = "1.16.0" 219 | description = "Python 2 and 3 compatibility utilities" 220 | category = "main" 221 | optional = false 222 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 223 | 224 | [[package]] 225 | name = "sqlparse" 226 | version = "0.4.1" 227 | description = "A non-validating SQL parser." 228 | category = "main" 229 | optional = false 230 | python-versions = ">=3.5" 231 | 232 | [[package]] 233 | name = "toml" 234 | version = "0.10.2" 235 | description = "Python Library for Tom's Obvious, Minimal Language" 236 | category = "dev" 237 | optional = false 238 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 239 | 240 | [[package]] 241 | name = "typing-extensions" 242 | version = "3.10.0.0" 243 | description = "Backported and Experimental Type Hints for Python 3.5+" 244 | category = "dev" 245 | optional = false 246 | python-versions = "*" 247 | 248 | [[package]] 249 | name = "zipp" 250 | version = "3.5.0" 251 | description = "Backport of pathlib-compatible object wrapper for zip files" 252 | category = "dev" 253 | optional = false 254 | python-versions = ">=3.6" 255 | 256 | [package.extras] 257 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] 258 | testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"] 259 | 260 | [metadata] 261 | lock-version = "1.1" 262 | python-versions = ">=3.7.1,<4.0" 263 | content-hash = "7a197d43c42b1f09ffcc8544cb11e72916bd44851f53c7350333ec6521b0ed11" 264 | 265 | [metadata.files] 266 | atomicwrites = [ 267 | {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, 268 | {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, 269 | ] 270 | attrs = [ 271 | {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"}, 272 | {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"}, 273 | ] 274 | colorama = [ 275 | {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, 276 | {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, 277 | ] 278 | cycler = [ 279 | {file = "cycler-0.10.0-py2.py3-none-any.whl", hash = "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d"}, 280 | {file = "cycler-0.10.0.tar.gz", hash = "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"}, 281 | ] 282 | importlib-metadata = [ 283 | {file = "importlib_metadata-4.6.3-py3-none-any.whl", hash = "sha256:51c6635429c77cf1ae634c997ff9e53ca3438b495f10a55ba28594dd69764a8b"}, 284 | {file = "importlib_metadata-4.6.3.tar.gz", hash = "sha256:0645585859e9a6689c523927a5032f2ba5919f1f7d0e84bd4533312320de1ff9"}, 285 | ] 286 | iniconfig = [ 287 | {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, 288 | {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, 289 | ] 290 | kiwisolver = [ 291 | {file = "kiwisolver-1.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fd34fbbfbc40628200730bc1febe30631347103fc8d3d4fa012c21ab9c11eca9"}, 292 | {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:d3155d828dec1d43283bd24d3d3e0d9c7c350cdfcc0bd06c0ad1209c1bbc36d0"}, 293 | {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:5a7a7dbff17e66fac9142ae2ecafb719393aaee6a3768c9de2fd425c63b53e21"}, 294 | {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:f8d6f8db88049a699817fd9178782867bf22283e3813064302ac59f61d95be05"}, 295 | {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:5f6ccd3dd0b9739edcf407514016108e2280769c73a85b9e59aa390046dbf08b"}, 296 | {file = "kiwisolver-1.3.1-cp36-cp36m-win32.whl", hash = "sha256:225e2e18f271e0ed8157d7f4518ffbf99b9450fca398d561eb5c4a87d0986dd9"}, 297 | {file = "kiwisolver-1.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:cf8b574c7b9aa060c62116d4181f3a1a4e821b2ec5cbfe3775809474113748d4"}, 298 | {file = "kiwisolver-1.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:232c9e11fd7ac3a470d65cd67e4359eee155ec57e822e5220322d7b2ac84fbf0"}, 299 | {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:b38694dcdac990a743aa654037ff1188c7a9801ac3ccc548d3341014bc5ca278"}, 300 | {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ca3820eb7f7faf7f0aa88de0e54681bddcb46e485beb844fcecbcd1c8bd01689"}, 301 | {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:c8fd0f1ae9d92b42854b2979024d7597685ce4ada367172ed7c09edf2cef9cb8"}, 302 | {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:1e1bc12fb773a7b2ffdeb8380609f4f8064777877b2225dec3da711b421fda31"}, 303 | {file = "kiwisolver-1.3.1-cp37-cp37m-win32.whl", hash = "sha256:72c99e39d005b793fb7d3d4e660aed6b6281b502e8c1eaf8ee8346023c8e03bc"}, 304 | {file = "kiwisolver-1.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:8be8d84b7d4f2ba4ffff3665bcd0211318aa632395a1a41553250484a871d454"}, 305 | {file = "kiwisolver-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:31dfd2ac56edc0ff9ac295193eeaea1c0c923c0355bf948fbd99ed6018010b72"}, 306 | {file = "kiwisolver-1.3.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:563c649cfdef27d081c84e72a03b48ea9408c16657500c312575ae9d9f7bc1c3"}, 307 | {file = "kiwisolver-1.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:78751b33595f7f9511952e7e60ce858c6d64db2e062afb325985ddbd34b5c131"}, 308 | {file = "kiwisolver-1.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a357fd4f15ee49b4a98b44ec23a34a95f1e00292a139d6015c11f55774ef10de"}, 309 | {file = "kiwisolver-1.3.1-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:5989db3b3b34b76c09253deeaf7fbc2707616f130e166996606c284395da3f18"}, 310 | {file = "kiwisolver-1.3.1-cp38-cp38-win32.whl", hash = "sha256:c08e95114951dc2090c4a630c2385bef681cacf12636fb0241accdc6b303fd81"}, 311 | {file = "kiwisolver-1.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:44a62e24d9b01ba94ae7a4a6c3fb215dc4af1dde817e7498d901e229aaf50e4e"}, 312 | {file = "kiwisolver-1.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:50af681a36b2a1dee1d3c169ade9fdc59207d3c31e522519181e12f1b3ba7000"}, 313 | {file = "kiwisolver-1.3.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:a53d27d0c2a0ebd07e395e56a1fbdf75ffedc4a05943daf472af163413ce9598"}, 314 | {file = "kiwisolver-1.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:834ee27348c4aefc20b479335fd422a2c69db55f7d9ab61721ac8cd83eb78882"}, 315 | {file = "kiwisolver-1.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5c3e6455341008a054cccee8c5d24481bcfe1acdbc9add30aa95798e95c65621"}, 316 | {file = "kiwisolver-1.3.1-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:acef3d59d47dd85ecf909c359d0fd2c81ed33bdff70216d3956b463e12c38a54"}, 317 | {file = "kiwisolver-1.3.1-cp39-cp39-win32.whl", hash = "sha256:c5518d51a0735b1e6cee1fdce66359f8d2b59c3ca85dc2b0813a8aa86818a030"}, 318 | {file = "kiwisolver-1.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:b9edd0110a77fc321ab090aaa1cfcaba1d8499850a12848b81be2222eab648f6"}, 319 | {file = "kiwisolver-1.3.1-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0cd53f403202159b44528498de18f9285b04482bab2a6fc3f5dd8dbb9352e30d"}, 320 | {file = "kiwisolver-1.3.1-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:33449715e0101e4d34f64990352bce4095c8bf13bed1b390773fc0a7295967b3"}, 321 | {file = "kiwisolver-1.3.1-pp36-pypy36_pp73-win32.whl", hash = "sha256:401a2e9afa8588589775fe34fc22d918ae839aaaf0c0e96441c0fdbce6d8ebe6"}, 322 | {file = "kiwisolver-1.3.1.tar.gz", hash = "sha256:950a199911a8d94683a6b10321f9345d5a3a8433ec58b217ace979e18f16e248"}, 323 | ] 324 | matplotlib = [ 325 | {file = "matplotlib-3.4.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c541ee5a3287efe066bbe358320853cf4916bc14c00c38f8f3d8d75275a405a9"}, 326 | {file = "matplotlib-3.4.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:3a5c18dbd2c7c366da26a4ad1462fe3e03a577b39e3b503bbcf482b9cdac093c"}, 327 | {file = "matplotlib-3.4.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:a9d8cb5329df13e0cdaa14b3b43f47b5e593ec637f13f14db75bb16e46178b05"}, 328 | {file = "matplotlib-3.4.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:7ad19f3fb6145b9eb41c08e7cbb9f8e10b91291396bee21e9ce761bb78df63ec"}, 329 | {file = "matplotlib-3.4.2-cp37-cp37m-win32.whl", hash = "sha256:7a58f3d8fe8fac3be522c79d921c9b86e090a59637cb88e3bc51298d7a2c862a"}, 330 | {file = "matplotlib-3.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6382bc6e2d7e481bcd977eb131c31dee96e0fb4f9177d15ec6fb976d3b9ace1a"}, 331 | {file = "matplotlib-3.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a6a44f27aabe720ec4fd485061e8a35784c2b9ffa6363ad546316dfc9cea04e"}, 332 | {file = "matplotlib-3.4.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1c1779f7ab7d8bdb7d4c605e6ffaa0614b3e80f1e3c8ccf7b9269a22dbc5986b"}, 333 | {file = "matplotlib-3.4.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5826f56055b9b1c80fef82e326097e34dc4af8c7249226b7dd63095a686177d1"}, 334 | {file = "matplotlib-3.4.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0bea5ec5c28d49020e5d7923c2725b837e60bc8be99d3164af410eb4b4c827da"}, 335 | {file = "matplotlib-3.4.2-cp38-cp38-win32.whl", hash = "sha256:6475d0209024a77f869163ec3657c47fed35d9b6ed8bccba8aa0f0099fbbdaa8"}, 336 | {file = "matplotlib-3.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:21b31057bbc5e75b08e70a43cefc4c0b2c2f1b1a850f4a0f7af044eb4163086c"}, 337 | {file = "matplotlib-3.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b26535b9de85326e6958cdef720ecd10bcf74a3f4371bf9a7e5b2e659c17e153"}, 338 | {file = "matplotlib-3.4.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:32fa638cc10886885d1ca3d409d4473d6a22f7ceecd11322150961a70fab66dd"}, 339 | {file = "matplotlib-3.4.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:956c8849b134b4a343598305a3ca1bdd3094f01f5efc8afccdebeffe6b315247"}, 340 | {file = "matplotlib-3.4.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:85f191bb03cb1a7b04b5c2cca4792bef94df06ef473bc49e2818105671766fee"}, 341 | {file = "matplotlib-3.4.2-cp39-cp39-win32.whl", hash = "sha256:b1d5a2cedf5de05567c441b3a8c2651fbde56df08b82640e7f06c8cd91e201f6"}, 342 | {file = "matplotlib-3.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:df815378a754a7edd4559f8c51fc7064f779a74013644a7f5ac7a0c31f875866"}, 343 | {file = "matplotlib-3.4.2.tar.gz", hash = "sha256:d8d994cefdff9aaba45166eb3de4f5211adb4accac85cbf97137e98f26ea0219"}, 344 | ] 345 | numpy = [ 346 | {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, 347 | {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, 348 | {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062"}, 349 | {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1"}, 350 | {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671"}, 351 | {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e"}, 352 | {file = "numpy-1.21.1-cp37-cp37m-win32.whl", hash = "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172"}, 353 | {file = "numpy-1.21.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8"}, 354 | {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16"}, 355 | {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267"}, 356 | {file = "numpy-1.21.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6"}, 357 | {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63"}, 358 | {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af"}, 359 | {file = "numpy-1.21.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5"}, 360 | {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68"}, 361 | {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8"}, 362 | {file = "numpy-1.21.1-cp38-cp38-win32.whl", hash = "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd"}, 363 | {file = "numpy-1.21.1-cp38-cp38-win_amd64.whl", hash = "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214"}, 364 | {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f"}, 365 | {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b"}, 366 | {file = "numpy-1.21.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac"}, 367 | {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1"}, 368 | {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1"}, 369 | {file = "numpy-1.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a"}, 370 | {file = "numpy-1.21.1-cp39-cp39-win32.whl", hash = "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2"}, 371 | {file = "numpy-1.21.1-cp39-cp39-win_amd64.whl", hash = "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33"}, 372 | {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, 373 | {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, 374 | ] 375 | packaging = [ 376 | {file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"}, 377 | {file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"}, 378 | ] 379 | pandas = [ 380 | {file = "pandas-1.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1ee8418d0f936ff2216513aa03e199657eceb67690995d427a4a7ecd2e68f442"}, 381 | {file = "pandas-1.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d9acfca191140a518779d1095036d842d5e5bc8e8ad8b5eaad1aff90fe1870d"}, 382 | {file = "pandas-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e323028ab192fcfe1e8999c012a0fa96d066453bb354c7e7a4a267b25e73d3c8"}, 383 | {file = "pandas-1.3.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d06661c6eb741ae633ee1c57e8c432bb4203024e263fe1a077fa3fda7817fdb"}, 384 | {file = "pandas-1.3.1-cp37-cp37m-win32.whl", hash = "sha256:23c7452771501254d2ae23e9e9dac88417de7e6eff3ce64ee494bb94dc88c300"}, 385 | {file = "pandas-1.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7150039e78a81eddd9f5a05363a11cadf90a4968aac6f086fd83e66cf1c8d1d6"}, 386 | {file = "pandas-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5c09a2538f0fddf3895070579082089ff4ae52b6cb176d8ec7a4dacf7e3676c1"}, 387 | {file = "pandas-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:905fc3e0fcd86b0a9f1f97abee7d36894698d2592b22b859f08ea5a8fe3d3aab"}, 388 | {file = "pandas-1.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ee927c70794e875a59796fab8047098aa59787b1be680717c141cd7873818ae"}, 389 | {file = "pandas-1.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c976e023ed580e60a82ccebdca8e1cc24d8b1fbb28175eb6521025c127dab66"}, 390 | {file = "pandas-1.3.1-cp38-cp38-win32.whl", hash = "sha256:22f3fcc129fb482ef44e7df2a594f0bd514ac45aabe50da1a10709de1b0f9d84"}, 391 | {file = "pandas-1.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:45656cd59ae9745a1a21271a62001df58342b59c66d50754390066db500a8362"}, 392 | {file = "pandas-1.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:114c6789d15862508900a25cb4cb51820bfdd8595ea306bab3b53cd19f990b65"}, 393 | {file = "pandas-1.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:527c43311894aff131dea99cf418cd723bfd4f0bcf3c3da460f3b57e52a64da5"}, 394 | {file = "pandas-1.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb3b33dde260b1766ea4d3c6b8fbf6799cee18d50a2a8bc534cf3550b7c819a"}, 395 | {file = "pandas-1.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c28760932283d2c9f6fa5e53d2f77a514163b9e67fd0ee0879081be612567195"}, 396 | {file = "pandas-1.3.1-cp39-cp39-win32.whl", hash = "sha256:be12d77f7e03c40a2466ed00ccd1a5f20a574d3c622fe1516037faa31aa448aa"}, 397 | {file = "pandas-1.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:9e1fe6722cbe27eb5891c1977bca62d456c19935352eea64d33956db46139364"}, 398 | {file = "pandas-1.3.1.tar.gz", hash = "sha256:341935a594db24f3ff07d1b34d1d231786aa9adfa84b76eab10bf42907c8aed3"}, 399 | ] 400 | pillow = [ 401 | {file = "Pillow-8.3.1-1-cp36-cp36m-win_amd64.whl", hash = "sha256:fd7eef578f5b2200d066db1b50c4aa66410786201669fb76d5238b007918fb24"}, 402 | {file = "Pillow-8.3.1-1-cp37-cp37m-win_amd64.whl", hash = "sha256:75e09042a3b39e0ea61ce37e941221313d51a9c26b8e54e12b3ececccb71718a"}, 403 | {file = "Pillow-8.3.1-1-cp38-cp38-win_amd64.whl", hash = "sha256:c0e0550a404c69aab1e04ae89cca3e2a042b56ab043f7f729d984bf73ed2a093"}, 404 | {file = "Pillow-8.3.1-1-cp39-cp39-win_amd64.whl", hash = "sha256:479ab11cbd69612acefa8286481f65c5dece2002ffaa4f9db62682379ca3bb77"}, 405 | {file = "Pillow-8.3.1-1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:f156d6ecfc747ee111c167f8faf5f4953761b5e66e91a4e6767e548d0f80129c"}, 406 | {file = "Pillow-8.3.1-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:196560dba4da7a72c5e7085fccc5938ab4075fd37fe8b5468869724109812edd"}, 407 | {file = "Pillow-8.3.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c9569049d04aaacd690573a0398dbd8e0bf0255684fee512b413c2142ab723"}, 408 | {file = "Pillow-8.3.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c088a000dfdd88c184cc7271bfac8c5b82d9efa8637cd2b68183771e3cf56f04"}, 409 | {file = "Pillow-8.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fc214a6b75d2e0ea7745488da7da3c381f41790812988c7a92345978414fad37"}, 410 | {file = "Pillow-8.3.1-cp36-cp36m-win32.whl", hash = "sha256:a17ca41f45cf78c2216ebfab03add7cc350c305c38ff34ef4eef66b7d76c5229"}, 411 | {file = "Pillow-8.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:67b3666b544b953a2777cb3f5a922e991be73ab32635666ee72e05876b8a92de"}, 412 | {file = "Pillow-8.3.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:ff04c373477723430dce2e9d024c708a047d44cf17166bf16e604b379bf0ca14"}, 413 | {file = "Pillow-8.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9364c81b252d8348e9cc0cb63e856b8f7c1b340caba6ee7a7a65c968312f7dab"}, 414 | {file = "Pillow-8.3.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a2f381932dca2cf775811a008aa3027671ace723b7a38838045b1aee8669fdcf"}, 415 | {file = "Pillow-8.3.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d0da39795049a9afcaadec532e7b669b5ebbb2a9134576ebcc15dd5bdae33cc0"}, 416 | {file = "Pillow-8.3.1-cp37-cp37m-win32.whl", hash = "sha256:2b6dfa068a8b6137da34a4936f5a816aba0ecc967af2feeb32c4393ddd671cba"}, 417 | {file = "Pillow-8.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a4eef1ff2d62676deabf076f963eda4da34b51bc0517c70239fafed1d5b51500"}, 418 | {file = "Pillow-8.3.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:660a87085925c61a0dcc80efb967512ac34dbb256ff7dd2b9b4ee8dbdab58cf4"}, 419 | {file = "Pillow-8.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:15a2808e269a1cf2131930183dcc0419bc77bb73eb54285dde2706ac9939fa8e"}, 420 | {file = "Pillow-8.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:969cc558cca859cadf24f890fc009e1bce7d7d0386ba7c0478641a60199adf79"}, 421 | {file = "Pillow-8.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ee77c14a0299d0541d26f3d8500bb57e081233e3fa915fa35abd02c51fa7fae"}, 422 | {file = "Pillow-8.3.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c11003197f908878164f0e6da15fce22373ac3fc320cda8c9d16e6bba105b844"}, 423 | {file = "Pillow-8.3.1-cp38-cp38-win32.whl", hash = "sha256:3f08bd8d785204149b5b33e3b5f0ebbfe2190ea58d1a051c578e29e39bfd2367"}, 424 | {file = "Pillow-8.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:70af7d222df0ff81a2da601fab42decb009dc721545ed78549cb96e3a1c5f0c8"}, 425 | {file = "Pillow-8.3.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:37730f6e68bdc6a3f02d2079c34c532330d206429f3cee651aab6b66839a9f0e"}, 426 | {file = "Pillow-8.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4bc3c7ef940eeb200ca65bd83005eb3aae8083d47e8fcbf5f0943baa50726856"}, 427 | {file = "Pillow-8.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c35d09db702f4185ba22bb33ef1751ad49c266534339a5cebeb5159d364f6f82"}, 428 | {file = "Pillow-8.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b2efa07f69dc395d95bb9ef3299f4ca29bcb2157dc615bae0b42c3c20668ffc"}, 429 | {file = "Pillow-8.3.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cc866706d56bd3a7dbf8bac8660c6f6462f2f2b8a49add2ba617bc0c54473d83"}, 430 | {file = "Pillow-8.3.1-cp39-cp39-win32.whl", hash = "sha256:9a211b663cf2314edbdb4cf897beeb5c9ee3810d1d53f0e423f06d6ebbf9cd5d"}, 431 | {file = "Pillow-8.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:c2a5ff58751670292b406b9f06e07ed1446a4b13ffced6b6cab75b857485cbc8"}, 432 | {file = "Pillow-8.3.1-pp36-pypy36_pp73-macosx_10_10_x86_64.whl", hash = "sha256:c379425c2707078dfb6bfad2430728831d399dc95a7deeb92015eb4c92345eaf"}, 433 | {file = "Pillow-8.3.1-pp36-pypy36_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:114f816e4f73f9ec06997b2fde81a92cbf0777c9e8f462005550eed6bae57e63"}, 434 | {file = "Pillow-8.3.1-pp36-pypy36_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8960a8a9f4598974e4c2aeb1bff9bdd5db03ee65fd1fce8adf3223721aa2a636"}, 435 | {file = "Pillow-8.3.1-pp37-pypy37_pp73-macosx_10_10_x86_64.whl", hash = "sha256:147bd9e71fb9dcf08357b4d530b5167941e222a6fd21f869c7911bac40b9994d"}, 436 | {file = "Pillow-8.3.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1fd5066cd343b5db88c048d971994e56b296868766e461b82fa4e22498f34d77"}, 437 | {file = "Pillow-8.3.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f4ebde71785f8bceb39dcd1e7f06bcc5d5c3cf48b9f69ab52636309387b097c8"}, 438 | {file = "Pillow-8.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c03e24be975e2afe70dfc5da6f187eea0b49a68bb2b69db0f30a61b7031cee4"}, 439 | {file = "Pillow-8.3.1.tar.gz", hash = "sha256:2cac53839bfc5cece8fdbe7f084d5e3ee61e1303cccc86511d351adcb9e2c792"}, 440 | ] 441 | pluggy = [ 442 | {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, 443 | {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, 444 | ] 445 | py = [ 446 | {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"}, 447 | {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"}, 448 | ] 449 | pyparsing = [ 450 | {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, 451 | {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, 452 | ] 453 | pytest = [ 454 | {file = "pytest-6.2.4-py3-none-any.whl", hash = "sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890"}, 455 | {file = "pytest-6.2.4.tar.gz", hash = "sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b"}, 456 | ] 457 | python-dateutil = [ 458 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, 459 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, 460 | ] 461 | pytz = [ 462 | {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"}, 463 | {file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"}, 464 | ] 465 | scipy = [ 466 | {file = "scipy-1.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a15a1f3fc0abff33e792d6049161b7795909b40b97c6cc2934ed54384017ab76"}, 467 | {file = "scipy-1.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e79570979ccdc3d165456dd62041d9556fb9733b86b4b6d818af7a0afc15f092"}, 468 | {file = "scipy-1.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:a423533c55fec61456dedee7b6ee7dce0bb6bfa395424ea374d25afa262be261"}, 469 | {file = "scipy-1.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:33d6b7df40d197bdd3049d64e8e680227151673465e5d85723b3b8f6b15a6ced"}, 470 | {file = "scipy-1.6.1-cp37-cp37m-win32.whl", hash = "sha256:6725e3fbb47da428794f243864f2297462e9ee448297c93ed1dcbc44335feb78"}, 471 | {file = "scipy-1.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:5fa9c6530b1661f1370bcd332a1e62ca7881785cc0f80c0d559b636567fab63c"}, 472 | {file = "scipy-1.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd50daf727f7c195e26f27467c85ce653d41df4358a25b32434a50d8870fc519"}, 473 | {file = "scipy-1.6.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:f46dd15335e8a320b0fb4685f58b7471702234cba8bb3442b69a3e1dc329c345"}, 474 | {file = "scipy-1.6.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0e5b0ccf63155d90da576edd2768b66fb276446c371b73841e3503be1d63fb5d"}, 475 | {file = "scipy-1.6.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:2481efbb3740977e3c831edfd0bd9867be26387cacf24eb5e366a6a374d3d00d"}, 476 | {file = "scipy-1.6.1-cp38-cp38-win32.whl", hash = "sha256:68cb4c424112cd4be886b4d979c5497fba190714085f46b8ae67a5e4416c32b4"}, 477 | {file = "scipy-1.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:5f331eeed0297232d2e6eea51b54e8278ed8bb10b099f69c44e2558c090d06bf"}, 478 | {file = "scipy-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0c8a51d33556bf70367452d4d601d1742c0e806cd0194785914daf19775f0e67"}, 479 | {file = "scipy-1.6.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:83bf7c16245c15bc58ee76c5418e46ea1811edcc2e2b03041b804e46084ab627"}, 480 | {file = "scipy-1.6.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:794e768cc5f779736593046c9714e0f3a5940bc6dcc1dba885ad64cbfb28e9f0"}, 481 | {file = "scipy-1.6.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5da5471aed911fe7e52b86bf9ea32fb55ae93e2f0fac66c32e58897cfb02fa07"}, 482 | {file = "scipy-1.6.1-cp39-cp39-win32.whl", hash = "sha256:8e403a337749ed40af60e537cc4d4c03febddcc56cd26e774c9b1b600a70d3e4"}, 483 | {file = "scipy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f"}, 484 | {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"}, 485 | ] 486 | six = [ 487 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 488 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 489 | ] 490 | sqlparse = [ 491 | {file = "sqlparse-0.4.1-py3-none-any.whl", hash = "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0"}, 492 | {file = "sqlparse-0.4.1.tar.gz", hash = "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8"}, 493 | ] 494 | toml = [ 495 | {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, 496 | {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, 497 | ] 498 | typing-extensions = [ 499 | {file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"}, 500 | {file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"}, 501 | {file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"}, 502 | ] 503 | zipp = [ 504 | {file = "zipp-3.5.0-py3-none-any.whl", hash = "sha256:957cfda87797e389580cb8b9e3870841ca991e2125350677b2ca83a0e99390a3"}, 505 | {file = "zipp-3.5.0.tar.gz", hash = "sha256:f5812b1e007e48cff63449a5e9f4e7ebea716b4111f9c4f9a645f91d579bf0c4"}, 506 | ] 507 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "syntheticdb" 3 | version = "0.0.19" 4 | description = "" 5 | authors = [] 6 | license = "MIT" 7 | 8 | [tool.poetry.dependencies] 9 | python = ">=3.7.1,<4.0" 10 | scipy = ">=1" 11 | pandas = ">=1" 12 | matplotlib = ">=3" 13 | sqlparse = "^0.4.1" 14 | 15 | [tool.poetry.dev-dependencies] 16 | pytest = "^6.2.4" 17 | 18 | [build-system] 19 | requires = ["poetry-core>=1.0.0"] 20 | build-backend = "poetry.core.masonry.api" 21 | -------------------------------------------------------------------------------- /syntheticdb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SyntheticAnalytics/SyntheticDB/53d0e3e57a3b51725c7f9b310e3a8c6bcdd7834b/syntheticdb/__init__.py -------------------------------------------------------------------------------- /syntheticdb/db_core.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, TypeVar, Callable, Union, Dict 3 | from copy import deepcopy 4 | import pandas as pd 5 | 6 | from syntheticdb.query_parser import Query, FloatRangeCondition, parse_sql_to_query 7 | 8 | T = TypeVar("T") 9 | 10 | 11 | def sample_until(callable: Callable[[], T], condition: Callable[[T], bool]) -> T: 12 | while True: 13 | out_val = callable() 14 | if condition(out_val): 15 | return out_val 16 | 17 | @dataclass 18 | class Distribution: 19 | sample: Callable[[], float] 20 | cdf: Callable[[float], float] 21 | 22 | @dataclass(frozen=True) 23 | class FloatColumn: 24 | distribution: Distribution 25 | 26 | def where(self, condition: FloatRangeCondition): 27 | if condition.min and condition.max: 28 | 29 | def new_cdf(point): 30 | cd_at_min = self.distribution.cdf(condition.min) 31 | cd_at_max = self.distribution.cdf(condition.max) 32 | if point < condition.min: 33 | return 0 34 | if condition.min < point < condition.max: 35 | return self.distribution.cdf(point) - cd_at_min 36 | if point > condition.max: 37 | return cd_at_max 38 | 39 | return FloatColumn( 40 | Distribution( 41 | sample=lambda: sample_until(self.distribution.sample, lambda x: condition.max > x > condition.min), 42 | cdf=new_cdf, 43 | ) 44 | ) 45 | if condition.min: 46 | 47 | def new_cdf(point): 48 | cd_at_min = self.distribution.cdf(condition.min) 49 | if point < condition.min: 50 | return 0 51 | if condition.min < point: 52 | return self.distribution.cdf(point) - cd_at_min 53 | 54 | return FloatColumn( 55 | Distribution( 56 | sample=lambda: sample_until(self.distribution.sample, lambda x: x > condition.min), 57 | cdf=new_cdf, 58 | ) 59 | ) 60 | if condition.max: 61 | 62 | def new_cdf(point): 63 | cd_at_max = self.distribution.cdf(condition.max) 64 | if point < condition.max: 65 | return self.distribution.cdf(point) 66 | if condition.max < point: 67 | return cd_at_max 68 | 69 | return FloatColumn( 70 | Distribution( 71 | sample=lambda: sample_until(self.distribution.sample, lambda x: x < condition.max), 72 | cdf=new_cdf, 73 | ) 74 | ) 75 | raise Exception( 76 | "Invalid condition, must have at least one range value populated" 77 | ) 78 | 79 | def prob(self): 80 | return self.distribution.cdf(1e10) 81 | 82 | 83 | @dataclass(frozen=True) 84 | class Table: 85 | columns: Dict[str, Union[FloatColumn]] 86 | row_count: int 87 | 88 | def get_row_num(self) -> int: 89 | probability = 1 90 | for name, column in self.columns.items(): 91 | probability = probability * column.prob() 92 | return int(self.row_count * probability) 93 | 94 | 95 | @dataclass 96 | class DataBase: 97 | tables: Dict[str, Table] 98 | 99 | def select(self, query_text: str): 100 | query = parse_sql_to_query(query_text) 101 | result = self.select_from_query(query) 102 | return result 103 | 104 | def select_from_query(self, query: Query) -> Dict[str, List[float]]: 105 | table_name = query.table_name 106 | where_clauses = query.where_clauses 107 | stripped_table_name = table_name.strip("`") 108 | table = self.tables.get(stripped_table_name) 109 | view_table = deepcopy(table) 110 | for clause in where_clauses: 111 | column = view_table.columns.get(clause.column_name, None) 112 | if column is None: 113 | raise Exception( 114 | f"column with name: {clause.column_name}, does not exist" 115 | ) 116 | if type(clause.condition) not in [FloatRangeCondition]: 117 | raise Exception(f"Condition type not supported") 118 | if type(clause.condition) == FloatRangeCondition: 119 | view_table.columns[clause.column_name] = column.where(clause.condition) 120 | new_row_count = view_table.get_row_num() 121 | columns_to_return = {} 122 | if query.columns == "*": 123 | for name, col in view_table.columns.items(): 124 | columns_to_return[name] = [col.distribution.sample() for _ in range(new_row_count)] 125 | else: 126 | for name, col in view_table.columns.items(): 127 | if name in query.columns: 128 | columns_to_return[name] = [ 129 | col.distribution.sample() for _ in range(new_row_count) 130 | ] 131 | return pd.DataFrame.from_dict(columns_to_return) 132 | -------------------------------------------------------------------------------- /syntheticdb/distributions.py: -------------------------------------------------------------------------------- 1 | from scipy.stats import uniform 2 | from scipy.stats import norm 3 | from scipy.stats import loguniform 4 | from scipy.stats import lognorm 5 | from scipy.stats import gamma 6 | from scipy.stats import expon 7 | from scipy.stats import beta 8 | from scipy.stats import weibull_min 9 | ## 10 | from syntheticdb.db_core import Distribution 11 | 12 | ## helper fn 13 | def makeDistribution(dist) -> Distribution: 14 | return Distribution(sample=dist.rvs, cdf=dist.cdf) 15 | 16 | ## distributions ## 17 | 18 | def Uniform(loc: float, scale: float) -> Distribution: 19 | dist = uniform(loc=loc, scale=scale) 20 | return makeDistribution(dist) 21 | 22 | def LogUniform(a: float, b: float) -> Distribution: 23 | dist = loguniform(a=a,b=b) 24 | return makeDistribution(dist) 25 | 26 | def Normal(loc: float, scale: float) -> Distribution: 27 | dist = norm(loc=loc, scale=scale) 28 | return makeDistribution(dist) 29 | 30 | def LogNormal(s: float, loc: float, scale: float) -> Distribution: 31 | dist = lognorm(s=s, loc=loc, scale=scale) 32 | return makeDistribution(dist) 33 | 34 | def Gamma(a: float, loc: float, scale: float) -> Distribution: 35 | dist = gamma(a=a, loc=loc, scale=scale) 36 | return makeDistribution(dist) 37 | 38 | def Exponential(loc: float, scale: float) -> Distribution: 39 | dist = expon(loc=loc, scale=scale) 40 | return makeDistribution(dist) 41 | 42 | def Beta(a: float, b: float) -> Distribution: 43 | dist = beta(a=a, b=b) 44 | return makeDistribution(dist) 45 | 46 | def Weibull(c: float, loc: float, scale: float) -> Distribution: 47 | dist = weibull_min(c=c, loc=loc, scale=scale) 48 | return makeDistribution(dist) 49 | 50 | -------------------------------------------------------------------------------- /syntheticdb/main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from syntheticdb.db_core import Table, DataBase, FloatColumn 3 | from syntheticdb.distributions import Uniform, LogUniform, Normal, LogNormal, Gamma, Exponential, Beta, Weibull 4 | 5 | if __name__ == "__main__": 6 | dist_table = Table( 7 | columns={ 8 | "unif": FloatColumn(Uniform(0,1)), 9 | "logUnif": FloatColumn(LogUniform(0.01,1.25)), 10 | "norm": FloatColumn(Normal(0,1)), 11 | "logNorm": FloatColumn(LogNormal(1,0,1)), 12 | "gamma": FloatColumn(Gamma(2,0,1)), 13 | "exp": FloatColumn(Exponential(0,1)), 14 | "beta": FloatColumn(Beta(2,3)), 15 | "wei": FloatColumn(Weibull(2,0,1)), 16 | }, 17 | row_count=100, 18 | ) 19 | db = DataBase(tables={"dist": dist_table}) 20 | df = db.select("select * from dist") 21 | plot = df["unif"].hist(bins=50) 22 | fig = plot.get_figure() 23 | fig.savefig("test.png") 24 | 25 | print(df) 26 | -------------------------------------------------------------------------------- /syntheticdb/query_parser.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union, Tuple, Callable 3 | import sqlparse 4 | from sqlparse.sql import IdentifierList, Token, Identifier, Where, Comparison 5 | from functools import partial 6 | 7 | 8 | @dataclass 9 | class FloatRangeCondition: 10 | min: Optional[float] 11 | max: Optional[float] 12 | 13 | 14 | @dataclass 15 | class WhereClause: 16 | column_name: str 17 | condition: Union[FloatRangeCondition] 18 | 19 | 20 | @dataclass 21 | class Query: 22 | table_name: str 23 | columns: Union[List[str], str] 24 | where_clauses: List[WhereClause] 25 | 26 | 27 | # def parse_sql_to_query(raw: str) -> Query: 28 | # parsed = sqlparse.parse(raw)[0] 29 | # important_tokens = [token for token in parsed if token.is_whitespace is False] 30 | # from_position = -1 31 | # for i, token in enumerate(important_tokens): 32 | # if token.normalized == "FROM": 33 | # from_position = i 34 | # table_name: str = important_tokens[from_position + 1].value 35 | # table_name = table_name.strip("`") 36 | # where_position = -1 37 | # for i, token in enumerate(important_tokens): 38 | # if token.normalized == "WHERE": 39 | # where_position = i 40 | # 41 | # return Query( 42 | # table_name=table_name, 43 | # columns=[""], 44 | # where_clauses=[] 45 | # ) 46 | 47 | 48 | def query( 49 | columns: Union[List[str], str], 50 | table_name: str, 51 | where_clauses: List[WhereClause], 52 | ) -> Query: 53 | return Query(table_name=table_name, columns=columns, where_clauses=where_clauses) 54 | 55 | 56 | parser = Callable[[partial, List[Token]], Tuple[partial, List[Token]]] 57 | 58 | 59 | def parse_select(parse: partial, tokens: List[Token]) -> Tuple[partial, List[Token]]: 60 | if tokens[0].normalized == "SELECT": 61 | return parse, tokens[1:] 62 | else: 63 | raise Exception("SQL query must start with a select") 64 | 65 | 66 | def parse_columns(parse: partial, tokens: List[Token]) -> Tuple[partial, List[Token]]: 67 | token = tokens[0] 68 | if token.normalized == "*": 69 | return partial(parse, "*"), tokens[1:] 70 | elif type(token) == Identifier: 71 | identifier_list = [token.normalized] 72 | return partial(parse, identifier_list), tokens[1:] 73 | elif type(token) == IdentifierList: 74 | identifier_list = [ 75 | tok.normalized for tok in token.tokens if type(tok) == Identifier 76 | ] 77 | return partial(parse, identifier_list), tokens[1:] 78 | else: 79 | raise Exception("Must specify columns") 80 | 81 | 82 | def parse_from(parse: partial, tokens: List[Token]) -> Tuple[partial, List[Token]]: 83 | if tokens[0].normalized == "FROM": 84 | return parse, tokens[1:] 85 | else: 86 | raise Exception("Columns must be followed by a FROM clause") 87 | 88 | 89 | def parse_table(parse: partial, tokens: List[Token]) -> Tuple[partial, List[Token]]: 90 | if type(tokens[0]) == Identifier: 91 | parse = partial(parse, tokens[0].normalized) 92 | return parse, tokens[1:] 93 | else: 94 | raise Exception("From statement must be followed by a table name") 95 | 96 | 97 | def parse_where(parse: partial, tokens: List[Token]) -> Tuple[partial, List[Token]]: 98 | next = tokens[0] 99 | if type(next) == Where: 100 | where_tokens = next.tokens 101 | comparisons = [tok for tok in where_tokens if type(tok) == Comparison] 102 | conditions = [] 103 | for comparison in comparisons: 104 | column = comparison.left.normalized 105 | num = float(comparison.right.normalized) 106 | comparator = comparison.tokens[2].normalized 107 | if comparator == "<": 108 | conditions.append(WhereClause(column, FloatRangeCondition(None, num))) 109 | elif comparator == ">": 110 | conditions.append(WhereClause(column, FloatRangeCondition(num, None))) 111 | return partial(parse, conditions), tokens[1:] 112 | else: 113 | raise Exception( 114 | "Where select statement must either terminate or be followed by a where clause" 115 | ) 116 | 117 | 118 | def parse_sql_to_query(raw: str) -> Query: 119 | parsed = sqlparse.parse(raw)[0] 120 | tokens = [token for token in parsed if token.is_whitespace is False] 121 | parse = partial(query) 122 | parse, tokens = parse_select(parse, tokens) 123 | parse, tokens = parse_columns(parse, tokens) 124 | parse, tokens = parse_from(parse, tokens) 125 | parse, tokens = parse_table(parse, tokens) 126 | if len(tokens) == 0: 127 | parse = partial(parse, []) 128 | return parse() 129 | parse, tokens = parse_where(parse, tokens) 130 | return parse() 131 | 132 | 133 | @dataclass 134 | class ParseResult: 135 | success: bool 136 | query_inputs: List[Token] 137 | query_result: Optional[Query] 138 | 139 | 140 | if __name__ == "__main__": 141 | print( 142 | parse_sql_to_query( 143 | "select age, height from synth_user where age > 5 and height < 2" 144 | ) 145 | ) 146 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SyntheticAnalytics/SyntheticDB/53d0e3e57a3b51725c7f9b310e3a8c6bcdd7834b/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_sql.py: -------------------------------------------------------------------------------- 1 | # content of test_sample.py 2 | from syntheticdb.query_parser import ( 3 | Query, 4 | FloatRangeCondition, 5 | WhereClause, 6 | parse_sql_to_query, 7 | ) 8 | 9 | 10 | def test_parse_table(): 11 | assert parse_sql_to_query("select * from synth_user") == Query( 12 | table_name="synth_user", where_clauses=[], columns="*" 13 | ) 14 | 15 | 16 | def test_parse_where(): 17 | assert parse_sql_to_query("select * from synth_user where height < 5") == Query( 18 | table_name="synth_user", 19 | where_clauses=[WhereClause("height", condition=FloatRangeCondition(None, 5))], 20 | columns="*", 21 | ) 22 | 23 | 24 | def test_parse_columns(): 25 | assert parse_sql_to_query( 26 | "select age, height from synth_user where height < 5" 27 | ) == Query( 28 | table_name="synth_user", 29 | where_clauses=[WhereClause("height", condition=FloatRangeCondition(None, 5))], 30 | columns=["age", "height"], 31 | ) 32 | --------------------------------------------------------------------------------