├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── setup.py ├── sqlite_transform ├── __init__.py └── cli.py └── tests ├── conftest.py ├── test_jsonsplit.py ├── test_lambda.py └── test_parsedate.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: [3.6, 3.7, 3.8, 3.9] 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - uses: actions/cache@v2 20 | name: Configure pip caching 21 | with: 22 | path: ~/.cache/pip 23 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 24 | restore-keys: | 25 | ${{ runner.os }}-pip- 26 | - name: Install dependencies 27 | run: | 28 | pip install -e '.[test]' 29 | - name: Run tests 30 | run: | 31 | pytest 32 | deploy: 33 | runs-on: ubuntu-latest 34 | needs: [test] 35 | steps: 36 | - uses: actions/checkout@v2 37 | - name: Set up Python 38 | uses: actions/setup-python@v2 39 | with: 40 | python-version: '3.9' 41 | - uses: actions/cache@v2 42 | name: Configure pip caching 43 | with: 44 | path: ~/.cache/pip 45 | key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} 46 | restore-keys: | 47 | ${{ runner.os }}-publish-pip- 48 | - name: Install dependencies 49 | run: | 50 | pip install setuptools wheel twine 51 | - name: Publish 52 | env: 53 | TWINE_USERNAME: __token__ 54 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 55 | run: | 56 | python setup.py sdist bdist_wheel 57 | twine upload dist/* 58 | 59 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [3.6, 3.7, 3.8, 3.9] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - uses: actions/cache@v2 18 | name: Configure pip caching 19 | with: 20 | path: ~/.cache/pip 21 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 22 | restore-keys: | 23 | ${{ runner.os }}-pip- 24 | - name: Install dependencies 25 | run: | 26 | pip install -e '.[test]' 27 | - name: Run tests 28 | run: | 29 | pytest 30 | 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sqlite-transform 2 | 3 | ![No longer maintained](https://img.shields.io/badge/no%20longer-maintained-red) 4 | [![PyPI](https://img.shields.io/pypi/v/sqlite-transform.svg)](https://pypi.org/project/sqlite-transform/) 5 | [![Changelog](https://img.shields.io/github/v/release/simonw/sqlite-transform?include_prereleases&label=changelog)](https://github.com/simonw/sqlite-transform/releases) 6 | [![Tests](https://github.com/simonw/sqlite-transform/workflows/Test/badge.svg)](https://github.com/simonw/sqlite-transform/actions?query=workflow%3ATest) 7 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/dogsheep/sqlite-transform/blob/main/LICENSE) 8 | 9 | Tool for running transformations on columns in a SQLite database. 10 | 11 | > **:warning: This tool is no longer maintained** 12 | > 13 | > I added a new tool to [sqlite-utils](https://sqlite-utils.datasette.io/) called [sqlite-utils convert](https://sqlite-utils.datasette.io/en/stable/cli.html#converting-data-in-columns) which provides a super-set of the functionality originally provided here. `sqlite-transform` is no longer maintained, and I recommend switching to using `sqlite-utils convert` instead. 14 | 15 | ## How to install 16 | 17 | pip install sqlite-transform 18 | 19 | ## parsedate and parsedatetime 20 | 21 | These subcommands will run all values in the specified column through `dateutils.parser.parse()` and replace them with the result, formatted as an ISO timestamp or ISO date. 22 | 23 | For example, if a row in the database has an `opened` column which contains `10/10/2019 08:10:00 PM`, running the following command: 24 | 25 | sqlite-transform parsedatetime my.db mytable opened 26 | 27 | Will result in that value being replaced by `2019-10-10T20:10:00`. 28 | 29 | Using the `parsedate` subcommand here would result in `2019-10-10` instead. 30 | 31 | In the case of ambiguous dates such as `03/04/05` these commands both default to assuming American-style `mm/dd/yy` format. You can pass `--dayfirst` to specify that the day should be assumed to be first, or `--yearfirst` for the year. 32 | 33 | ## jsonsplit 34 | 35 | The `jsonsplit` subcommand takes columns that contain a comma-separated list, for example a `tags` column containing records like `"trees,park,dogs"` and converts it into a JSON array `["trees", "park", "dogs"]`. 36 | 37 | This is useful for taking advantage of Datasette's [Facet by JSON array](https://docs.datasette.io/en/stable/facets.html#facet-by-json-array) feature. 38 | 39 | sqlite-transform jsonsplit my.db mytable tags 40 | 41 | It defaults to splitting on commas, but you can specify a different delimiter character using the `--delimiter` option, for example: 42 | 43 | sqlite-transform jsonsplit \ 44 | my.db mytable tags --delimiter ';' 45 | 46 | Values within the array will be treated as strings, so a column containing `123,552,775` will be converted into the JSON array `["123", "552", "775"]`. 47 | 48 | You can specify a different type for these values using `--type int` or `--type float`, for example: 49 | 50 | sqlite-transform jsonsplit \ 51 | my.db mytable tags --type int 52 | 53 | This will result in that column being converted into `[123, 552, 775]`. 54 | 55 | ## lambda for executing your own code 56 | 57 | The `lambda` subcommand lets you specify Python code which will be executed against the column. 58 | 59 | Here's how to convert a column to uppercase: 60 | 61 | sqlite-transform lambda my.db mytable mycolumn --code='str(value).upper()' 62 | 63 | The code you provide will be compiled into a function that takes `value` as a single argument. You can break your function body into multiple lines, provided the last line is a `return` statement: 64 | 65 | sqlite-transform lambda my.db mytable mycolumn --code='value = str(value) 66 | return value.upper()' 67 | 68 | You can also specify Python modules that should be imported and made available to your code using one or more `--import` options: 69 | 70 | sqlite-transform lambda my.db mytable mycolumn \ 71 | --code='"\n".join(textwrap.wrap(value, 10))' \ 72 | --import=textwrap 73 | 74 | The `--dry-run` option will output a preview of the transformation against the first ten rows, without modifying the database. 75 | 76 | ## Saving the result to a separate column 77 | 78 | Each of these commands accepts optional `--output` and `--output-type` options. These can be used to save the result of the transformation to a separate column, which will be created if the column does not already exist. 79 | 80 | To save the result of `jsonsplit` to a new column called `json_tags`, use the following: 81 | 82 | sqlite-transform jsonsplit my.db mytable tags \ 83 | --output json_tags 84 | 85 | The type of the created column defaults to `text`, but a different column type can be specified using `--output-type`. This example will create a new floating point column called `float_id` with a copy of each item's ID increased by 0.5: 86 | 87 | sqlite-transform lambda my.db mytable id \ 88 | --code 'float(value) + 0.5' \ 89 | --output float_id \ 90 | --output-type float 91 | 92 | You can drop the original column at the end of the operation by adding `--drop`. 93 | 94 | ## Splitting a column into multiple columns 95 | 96 | Sometimes you may wish to convert a single column into multiple derived columns. For example, you may have a `location` column containing `latitude,longitude` values which you wish to split out into separate `latitude` and `longitude` columns. 97 | 98 | You can achieve this using the `--multi` option to `sqlite-transform lambda`. This option expects your `--code` function to return a Python dictionary: new columns well be created and populated for each of the keys in that dictionary. 99 | 100 | For the `latitude,longitude` example you would use the following: 101 | 102 | sqlite-transform lambda demo.db places location \ 103 | --code 'return { 104 | "latitude": float(value.split(",")[0]), 105 | "longitude": float(value.split(",")[1]), 106 | }' --multi 107 | 108 | The type of the returned values will be taken into account when creating the new columns. In this example, the resulting database schema will look like this: 109 | 110 | ```sql 111 | CREATE TABLE [places] ( 112 | [location] TEXT, 113 | [latitude] FLOAT, 114 | [longitude] FLOAT 115 | ); 116 | ``` 117 | The code function can also return `None`, in which case its output will be ignored. 118 | 119 | You can drop the original column at the end of the operation by adding `--drop`. 120 | 121 | ## Disabling the progress bar 122 | 123 | By default each command will show a progress bar. Pass `-s` or `--silent` to hide that progress bar. 124 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | VERSION = "1.2.1" 5 | 6 | 7 | def get_long_description(): 8 | with open( 9 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 10 | encoding="utf8", 11 | ) as fp: 12 | return fp.read() 13 | 14 | 15 | setup( 16 | name="sqlite-transform", 17 | description="Tool for running transformations on columns in a SQLite database.", 18 | long_description=get_long_description(), 19 | long_description_content_type="text/markdown", 20 | author="Simon Willison", 21 | url="https://github.com/simonw/sqlite-transform", 22 | license="Apache License, Version 2.0", 23 | version=VERSION, 24 | packages=["sqlite_transform"], 25 | entry_points=""" 26 | [console_scripts] 27 | sqlite-transform=sqlite_transform.cli:cli 28 | """, 29 | install_requires=["dateutils", "tqdm", "click", "sqlite-utils"], 30 | extras_require={"test": ["pytest"]}, 31 | tests_require=["sqlite-transform[test]"], 32 | ) 33 | -------------------------------------------------------------------------------- /sqlite_transform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonw/sqlite-transform/049b2fcb6671f86a0916392e0a84dedc160d8fa3/sqlite_transform/__init__.py -------------------------------------------------------------------------------- /sqlite_transform/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | from dateutil import parser 3 | import json 4 | import sqlite3 5 | import sqlite_utils 6 | import tqdm 7 | 8 | sqlite3.enable_callback_tracebacks(True) 9 | 10 | 11 | def common_options(fn): 12 | click.option("-s", "--silent", is_flag=True, help="Don't show a progress bar")(fn) 13 | click.option("--drop", is_flag=True, help="Drop original column afterwards")(fn) 14 | click.option( 15 | "--output-type", 16 | help="Column type to use for the output column", 17 | default="text", 18 | type=click.Choice(["integer", "float", "blob", "text"]), 19 | )(fn) 20 | click.option( 21 | "--output", help="Optional separate column to populate with the output" 22 | )(fn) 23 | return fn 24 | 25 | 26 | @click.group() 27 | @click.version_option() 28 | def cli(): 29 | "Tool for running transformations on columns in a SQLite database." 30 | 31 | 32 | @cli.command() 33 | @click.argument( 34 | "db_path", 35 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 36 | required=True, 37 | ) 38 | @click.argument("table", type=str) 39 | @click.argument("columns", type=str, nargs=-1, required=True) 40 | @click.option( 41 | "--dayfirst", 42 | is_flag=True, 43 | help="Assume day comes first in ambiguous dates, e.g. 03/04/05", 44 | ) 45 | @click.option( 46 | "--yearfirst", 47 | is_flag=True, 48 | help="Assume year comes first in ambiguous dates, e.g. 03/04/05", 49 | ) 50 | @common_options 51 | def parsedate( 52 | db_path, table, columns, dayfirst, yearfirst, output, output_type, drop, silent 53 | ): 54 | """ 55 | Parse and convert columns to ISO dates 56 | """ 57 | _transform( 58 | db_path, 59 | table, 60 | columns, 61 | lambda v: parser.parse(v, dayfirst=dayfirst, yearfirst=yearfirst) 62 | .date() 63 | .isoformat(), 64 | output, 65 | output_type, 66 | drop, 67 | silent, 68 | ) 69 | 70 | 71 | @cli.command() 72 | @click.argument( 73 | "db_path", 74 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 75 | required=True, 76 | ) 77 | @click.argument("table", type=str) 78 | @click.argument("columns", type=str, nargs=-1, required=True) 79 | @click.option( 80 | "--dayfirst", 81 | is_flag=True, 82 | help="Assume day comes first in ambiguous dates, e.g. 03/04/05", 83 | ) 84 | @click.option( 85 | "--yearfirst", 86 | is_flag=True, 87 | help="Assume year comes first in ambiguous dates, e.g. 03/04/05", 88 | ) 89 | @common_options 90 | def parsedatetime( 91 | db_path, table, columns, dayfirst, yearfirst, output, output_type, drop, silent 92 | ): 93 | """ 94 | Parse and convert columns to ISO timestamps 95 | """ 96 | _transform( 97 | db_path, 98 | table, 99 | columns, 100 | lambda v: parser.parse(v, dayfirst=dayfirst, yearfirst=yearfirst).isoformat(), 101 | output, 102 | output_type, 103 | drop, 104 | silent, 105 | ) 106 | 107 | 108 | @cli.command() 109 | @click.argument( 110 | "db_path", 111 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 112 | required=True, 113 | ) 114 | @click.argument("table", type=str) 115 | @click.argument("columns", type=str, nargs=-1, required=True) 116 | @click.option("--delimiter", default=",", help="Delimiter to split on") 117 | @click.option( 118 | "--type", 119 | type=click.Choice(("int", "float")), 120 | help="Type to use for values - int or float (defaults to string)", 121 | ) 122 | @common_options 123 | def jsonsplit( 124 | db_path, table, columns, delimiter, type, output, output_type, drop, silent 125 | ): 126 | """ 127 | Convert columns into JSON arrays by splitting on a delimiter 128 | """ 129 | value_convert = lambda s: s.strip() 130 | if type == "int": 131 | value_convert = lambda s: int(s.strip()) 132 | elif type == "float": 133 | value_convert = lambda s: float(s.strip()) 134 | 135 | def convert(value): 136 | return json.dumps([value_convert(s) for s in value.split(delimiter)]) 137 | 138 | _transform(db_path, table, columns, convert, output, output_type, drop, silent) 139 | 140 | 141 | @cli.command(name="lambda") 142 | @click.argument( 143 | "db_path", 144 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 145 | required=True, 146 | ) 147 | @click.argument("table", type=str) 148 | @click.argument("columns", type=str, nargs=-1, required=True) 149 | @click.option( 150 | "--code", type=str, required=True, help="Python code to transform 'value'" 151 | ) 152 | @click.option( 153 | "--import", "imports", type=str, multiple=True, help="Python modules to import" 154 | ) 155 | @click.option( 156 | "--dry-run", is_flag=True, help="Show results of running this against first 10 rows" 157 | ) 158 | @click.option( 159 | "--multi", is_flag=True, help="Populate columns for keys in returned dictionary" 160 | ) 161 | @common_options 162 | def lambda_( 163 | db_path, 164 | table, 165 | columns, 166 | code, 167 | imports, 168 | dry_run, 169 | multi, 170 | output, 171 | output_type, 172 | drop, 173 | silent, 174 | ): 175 | """ 176 | Transform columns using Python code you supply. For example: 177 | 178 | \b 179 | $ sqlite-transform lambda my.db mytable mycolumn 180 | --code='"\\n".join(textwrap.wrap(value, 10))' 181 | --import=textwrap 182 | 183 | "value" is a variable with the column value to be transformed. 184 | """ 185 | if output is not None and len(columns) > 1: 186 | raise click.ClickException("Cannot use --output with more than one column") 187 | if multi and len(columns) > 1: 188 | raise click.ClickException("Cannot use --multi with more than one column") 189 | # If single line and no 'return', add the return 190 | if "\n" not in code and not code.strip().startswith("return "): 191 | code = "return {}".format(code) 192 | # Compile the code into a function body called fn(value) 193 | new_code = ["def fn(value):"] 194 | for line in code.split("\n"): 195 | new_code.append(" {}".format(line)) 196 | code_o = compile("\n".join(new_code), "", "exec") 197 | locals = {} 198 | globals = {} 199 | for import_ in imports: 200 | globals[import_] = __import__(import_) 201 | exec(code_o, globals, locals) 202 | fn = locals["fn"] 203 | if dry_run: 204 | # Pull first 20 values for first column and preview them 205 | db = sqlite3.connect(db_path) 206 | db.create_function("preview_transform", 1, lambda v: fn(v) if v else v) 207 | sql = """ 208 | select 209 | [{column}] as value, 210 | preview_transform([{column}]) as preview 211 | from [{table}] limit 10 212 | """.format( 213 | column=columns[0], table=table 214 | ) 215 | for row in db.execute(sql).fetchall(): 216 | print(row[0]) 217 | print(" --- becomes:") 218 | print(row[1]) 219 | print() 220 | elif multi: 221 | _transform_multi(db_path, table, columns[0], fn, drop, silent) 222 | else: 223 | _transform(db_path, table, columns, fn, output, output_type, drop, silent) 224 | 225 | 226 | def _transform(db_path, table, columns, fn, output, output_type, drop, silent): 227 | db = sqlite_utils.Database(db_path) 228 | count_sql = "select count(*) from [{}]".format(table) 229 | todo_count = list(db.execute(count_sql).fetchall())[0][0] * len(columns) 230 | 231 | if drop and not output: 232 | raise click.ClickException("--drop can only be used with --output or --multi") 233 | 234 | if output is not None: 235 | if output not in db[table].columns_dict: 236 | db[table].add_column(output, output_type or "text") 237 | 238 | with tqdm.tqdm(total=todo_count, disable=silent) as bar: 239 | 240 | def transform_value(v): 241 | bar.update(1) 242 | if not v: 243 | return v 244 | return fn(v) 245 | 246 | db.register_function(transform_value) 247 | sql = "update [{table}] set {sets};".format( 248 | table=table, 249 | sets=", ".join( 250 | [ 251 | "[{output_column}] = transform_value([{column}])".format( 252 | output_column=output or column, column=column 253 | ) 254 | for column in columns 255 | ] 256 | ), 257 | ) 258 | with db.conn: 259 | db.execute(sql) 260 | if drop: 261 | db[table].transform(drop=columns) 262 | 263 | 264 | def _transform_multi(db_path, table, column, fn, drop, silent): 265 | db = sqlite_utils.Database(db_path) 266 | # First we execute the function 267 | pk_to_values = {} 268 | new_column_types = {} 269 | pks = [column.name for column in db[table].columns if column.is_pk] 270 | if not pks: 271 | pks = ["rowid"] 272 | with tqdm.tqdm(total=db[table].count, disable=silent, desc="1: Evaluating") as bar: 273 | for row in db[table].rows_where( 274 | select=", ".join( 275 | "[{}]".format(column_name) for column_name in (pks + [column]) 276 | ) 277 | ): 278 | row_pk = tuple(row[pk] for pk in pks) 279 | if len(row_pk) == 1: 280 | row_pk = row_pk[0] 281 | values = fn(row[column]) 282 | if values is not None and not isinstance(values, dict): 283 | raise click.ClickException( 284 | "With --multi code must return a Python dictionary - returned {}".format( 285 | repr(values) 286 | ) 287 | ) 288 | if values: 289 | for key, value in values.items(): 290 | new_column_types.setdefault(key, set()).add(type(value)) 291 | pk_to_values[row_pk] = values 292 | bar.update(1) 293 | 294 | # Add any new columns 295 | columns_to_create = _suggest_column_types(new_column_types) 296 | for column_name, column_type in columns_to_create.items(): 297 | if column_name not in db[table].columns_dict: 298 | db[table].add_column(column_name, column_type) 299 | 300 | # Run the updates 301 | with tqdm.tqdm(total=db[table].count, disable=silent, desc="2: Updating") as bar: 302 | with db.conn: 303 | for pk, updates in pk_to_values.items(): 304 | db[table].update(pk, updates) 305 | bar.update(1) 306 | if drop: 307 | db[table].transform(drop=(column,)) 308 | 309 | 310 | def _suggest_column_types(all_column_types): 311 | column_types = {} 312 | for key, types in all_column_types.items(): 313 | # Ignore null values if at least one other type present: 314 | if len(types) > 1: 315 | types.discard(None.__class__) 316 | if {None.__class__} == types: 317 | t = str 318 | elif len(types) == 1: 319 | t = list(types)[0] 320 | # But if it's a subclass of list / tuple / dict, use str 321 | # instead as we will be storing it as JSON in the table 322 | for superclass in (list, tuple, dict): 323 | if issubclass(t, superclass): 324 | t = str 325 | elif {int, bool}.issuperset(types): 326 | t = int 327 | elif {int, float, bool}.issuperset(types): 328 | t = float 329 | elif {bytes, str}.issuperset(types): 330 | t = bytes 331 | else: 332 | t = str 333 | column_types[key] = t 334 | return column_types 335 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | import sqlite_utils 4 | 5 | 6 | @pytest.fixture 7 | def test_db_and_path(fresh_db_and_path): 8 | db, db_path = fresh_db_and_path 9 | db["example"].insert_all( 10 | [ 11 | {"id": 1, "dt": "5th October 2019 12:04"}, 12 | {"id": 2, "dt": "6th October 2019 00:05:06"}, 13 | {"id": 3, "dt": ""}, 14 | {"id": 4, "dt": None}, 15 | ], 16 | pk="id", 17 | ) 18 | return db, db_path 19 | 20 | 21 | @pytest.fixture 22 | def fresh_db_and_path(tmpdir): 23 | db_path = str(pathlib.Path(tmpdir) / "data.db") 24 | db = sqlite_utils.Database(db_path) 25 | return db, db_path 26 | -------------------------------------------------------------------------------- /tests/test_jsonsplit.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | import json 3 | import pathlib 4 | import pytest 5 | from sqlite_transform import cli 6 | import sqlite_utils 7 | 8 | 9 | @pytest.mark.parametrize("delimiter", [None, ";", "-"]) 10 | def test_jsonsplit(tmpdir, delimiter): 11 | db_path = str(pathlib.Path(tmpdir) / "data.db") 12 | db = sqlite_utils.Database(db_path) 13 | db["example"].insert_all( 14 | [ 15 | {"id": 1, "tags": (delimiter or ",").join(["foo", "bar"])}, 16 | {"id": 2, "tags": (delimiter or ",").join(["bar", "baz"])}, 17 | ], 18 | pk="id", 19 | ) 20 | args = ["jsonsplit", db_path, "example", "tags"] 21 | if delimiter is not None: 22 | args.extend(["--delimiter", delimiter]) 23 | result = CliRunner().invoke(cli.cli, args) 24 | assert 0 == result.exit_code, result.output 25 | assert list(db["example"].rows) == [ 26 | {"id": 1, "tags": '["foo", "bar"]'}, 27 | {"id": 2, "tags": '["bar", "baz"]'}, 28 | ] 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "type,expected_array", 33 | ( 34 | (None, ["1", "2", "3"]), 35 | ("float", [1.0, 2.0, 3.0]), 36 | ("int", [1, 2, 3]), 37 | ), 38 | ) 39 | def test_jsonsplit_type(fresh_db_and_path, type, expected_array): 40 | db, db_path = fresh_db_and_path 41 | db["example"].insert_all( 42 | [ 43 | {"id": 1, "records": "1,2,3"}, 44 | ], 45 | pk="id", 46 | ) 47 | args = ["jsonsplit", db_path, "example", "records"] 48 | if type is not None: 49 | args.extend(("--type", type)) 50 | result = CliRunner().invoke(cli.cli, args) 51 | assert 0 == result.exit_code, result.output 52 | assert json.loads(db["example"].get(1)["records"]) == expected_array 53 | 54 | 55 | @pytest.mark.parametrize("drop", (True, False)) 56 | def test_jsonsplit_output(fresh_db_and_path, drop): 57 | db, db_path = fresh_db_and_path 58 | db["example"].insert_all( 59 | [ 60 | {"id": 1, "records": "1,2,3"}, 61 | ], 62 | pk="id", 63 | ) 64 | args = ["jsonsplit", db_path, "example", "records", "--output", "tags"] 65 | if drop: 66 | args += ["--drop"] 67 | result = CliRunner().invoke(cli.cli, args) 68 | assert 0 == result.exit_code, result.output 69 | expected = { 70 | "id": 1, 71 | "records": "1,2,3", 72 | "tags": '["1", "2", "3"]', 73 | } 74 | if drop: 75 | del expected["records"] 76 | assert db["example"].get(1) == expected 77 | -------------------------------------------------------------------------------- /tests/test_lambda.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from sqlite_transform import cli 3 | import textwrap 4 | import pytest 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "code", 9 | [ 10 | "return value.replace('October', 'Spooktober')", 11 | # Return is optional: 12 | "value.replace('October', 'Spooktober')", 13 | ], 14 | ) 15 | def test_lambda_single_line(test_db_and_path, code): 16 | db, db_path = test_db_and_path 17 | result = CliRunner().invoke( 18 | cli.cli, ["lambda", db_path, "example", "dt", "--code", code] 19 | ) 20 | assert 0 == result.exit_code, result.output 21 | assert [ 22 | {"id": 1, "dt": "5th Spooktober 2019 12:04"}, 23 | {"id": 2, "dt": "6th Spooktober 2019 00:05:06"}, 24 | {"id": 3, "dt": ""}, 25 | {"id": 4, "dt": None}, 26 | ] == list(db["example"].rows) 27 | 28 | 29 | def test_lambda_multiple_lines(test_db_and_path): 30 | db, db_path = test_db_and_path 31 | result = CliRunner().invoke( 32 | cli.cli, 33 | [ 34 | "lambda", 35 | db_path, 36 | "example", 37 | "dt", 38 | "--code", 39 | "v = value.replace('October', 'Spooktober')\nreturn v.upper()", 40 | ], 41 | ) 42 | assert 0 == result.exit_code, result.output 43 | assert [ 44 | {"id": 1, "dt": "5TH SPOOKTOBER 2019 12:04"}, 45 | {"id": 2, "dt": "6TH SPOOKTOBER 2019 00:05:06"}, 46 | {"id": 3, "dt": ""}, 47 | {"id": 4, "dt": None}, 48 | ] == list(db["example"].rows) 49 | 50 | 51 | def test_lambda_import(test_db_and_path): 52 | db, db_path = test_db_and_path 53 | result = CliRunner().invoke( 54 | cli.cli, 55 | [ 56 | "lambda", 57 | db_path, 58 | "example", 59 | "dt", 60 | "--code", 61 | "return re.sub('O..', 'OXX', value)", 62 | "--import", 63 | "re", 64 | ], 65 | ) 66 | assert 0 == result.exit_code, result.output 67 | assert [ 68 | {"id": 1, "dt": "5th OXXober 2019 12:04"}, 69 | {"id": 2, "dt": "6th OXXober 2019 00:05:06"}, 70 | {"id": 3, "dt": ""}, 71 | {"id": 4, "dt": None}, 72 | ] == list(db["example"].rows) 73 | 74 | 75 | def test_lambda_dryrun(test_db_and_path): 76 | db, db_path = test_db_and_path 77 | result = CliRunner().invoke( 78 | cli.cli, 79 | [ 80 | "lambda", 81 | db_path, 82 | "example", 83 | "dt", 84 | "--code", 85 | "return re.sub('O..', 'OXX', value)", 86 | "--import", 87 | "re", 88 | "--dry-run", 89 | ], 90 | ) 91 | assert result.exit_code == 0 92 | assert result.output.strip() == ( 93 | "5th October 2019 12:04\n" 94 | " --- becomes:\n" 95 | "5th OXXober 2019 12:04\n" 96 | "\n" 97 | "6th October 2019 00:05:06\n" 98 | " --- becomes:\n" 99 | "6th OXXober 2019 00:05:06\n" 100 | "\n" 101 | "\n" 102 | " --- becomes:\n" 103 | "\n" 104 | "\n" 105 | "None\n" 106 | " --- becomes:\n" 107 | "None" 108 | ) 109 | # But it should not have actually modified the table data 110 | assert list(db["example"].rows) == [ 111 | {"id": 1, "dt": "5th October 2019 12:04"}, 112 | {"id": 2, "dt": "6th October 2019 00:05:06"}, 113 | {"id": 3, "dt": ""}, 114 | {"id": 4, "dt": None}, 115 | ] 116 | 117 | 118 | @pytest.mark.parametrize("drop", (True, False)) 119 | def test_lambda_output_column(test_db_and_path, drop): 120 | db, db_path = test_db_and_path 121 | args = [ 122 | "lambda", 123 | db_path, 124 | "example", 125 | "dt", 126 | "--code", 127 | "value.replace('October', 'Spooktober')", 128 | "--output", 129 | "newcol", 130 | ] 131 | if drop: 132 | args += ["--drop"] 133 | result = CliRunner().invoke(cli.cli, args) 134 | assert 0 == result.exit_code, result.output 135 | expected = [ 136 | { 137 | "id": 1, 138 | "dt": "5th October 2019 12:04", 139 | "newcol": "5th Spooktober 2019 12:04", 140 | }, 141 | { 142 | "id": 2, 143 | "dt": "6th October 2019 00:05:06", 144 | "newcol": "6th Spooktober 2019 00:05:06", 145 | }, 146 | {"id": 3, "dt": "", "newcol": ""}, 147 | {"id": 4, "dt": None, "newcol": None}, 148 | ] 149 | if drop: 150 | for row in expected: 151 | del row["dt"] 152 | assert list(db["example"].rows) == expected 153 | 154 | 155 | @pytest.mark.parametrize( 156 | "output_type,expected", 157 | ( 158 | ("text", [(1, "1"), (2, "2"), (3, "3"), (4, "4")]), 159 | ("float", [(1, 1.0), (2, 2.0), (3, 3.0), (4, 4.0)]), 160 | ("integer", [(1, 1), (2, 2), (3, 3), (4, 4)]), 161 | (None, [(1, "1"), (2, "2"), (3, "3"), (4, "4")]), 162 | ), 163 | ) 164 | def test_lambda_output_column_output_type(test_db_and_path, output_type, expected): 165 | db, db_path = test_db_and_path 166 | args = [ 167 | "lambda", 168 | db_path, 169 | "example", 170 | "id", 171 | "--code", 172 | "value", 173 | "--output", 174 | "new_id", 175 | ] 176 | if output_type: 177 | args += ["--output-type", output_type] 178 | result = CliRunner().invoke( 179 | cli.cli, 180 | args, 181 | ) 182 | assert 0 == result.exit_code, result.output 183 | assert expected == list(db.execute("select id, new_id from example")) 184 | 185 | 186 | @pytest.mark.parametrize( 187 | "options,expected_error", 188 | [ 189 | ( 190 | [ 191 | "dt", 192 | "id", 193 | "--code", 194 | "value.replace('October', 'Spooktober')", 195 | "--output", 196 | "newcol", 197 | ], 198 | "Cannot use --output with more than one column", 199 | ), 200 | ( 201 | [ 202 | "dt", 203 | "--code", 204 | "value.replace('October', 'Spooktober')", 205 | "--output", 206 | "newcol", 207 | "--output-type", 208 | "invalid", 209 | ], 210 | "Error: Invalid value for '--output-type'", 211 | ), 212 | ( 213 | [ 214 | "--code", 215 | "value.replace('October', 'Spooktober')", 216 | ], 217 | "Missing argument 'COLUMNS...'", 218 | ), 219 | ], 220 | ) 221 | def test_lambda_output_error(test_db_and_path, options, expected_error): 222 | db_path = test_db_and_path[1] 223 | result = CliRunner().invoke( 224 | cli.cli, 225 | [ 226 | "lambda", 227 | db_path, 228 | "example", 229 | ] 230 | + options, 231 | ) 232 | assert result.exit_code != 0 233 | assert expected_error in result.output 234 | 235 | 236 | @pytest.mark.parametrize("drop", (True, False)) 237 | def test_lambda_multi(fresh_db_and_path, drop): 238 | db, db_path = fresh_db_and_path 239 | db["creatures"].insert_all( 240 | [ 241 | {"id": 1, "name": "Simon"}, 242 | {"id": 2, "name": "Cleo"}, 243 | ], 244 | pk="id", 245 | ) 246 | args = [ 247 | "lambda", 248 | db_path, 249 | "creatures", 250 | "name", 251 | "--multi", 252 | "--code", 253 | '{"upper": value.upper(), "lower": value.lower()}', 254 | ] 255 | if drop: 256 | args += ["--drop"] 257 | result = CliRunner().invoke(cli.cli, args) 258 | assert result.exit_code == 0, result.output 259 | expected = [ 260 | {"id": 1, "name": "Simon", "upper": "SIMON", "lower": "simon"}, 261 | {"id": 2, "name": "Cleo", "upper": "CLEO", "lower": "cleo"}, 262 | ] 263 | if drop: 264 | for row in expected: 265 | del row["name"] 266 | assert list(db["creatures"].rows) == expected 267 | 268 | 269 | def test_lambda_multi_complex_column_types(fresh_db_and_path): 270 | db, db_path = fresh_db_and_path 271 | db["rows"].insert_all( 272 | [ 273 | {"id": 1}, 274 | {"id": 2}, 275 | {"id": 3}, 276 | {"id": 4}, 277 | ], 278 | pk="id", 279 | ) 280 | code = textwrap.dedent( 281 | """ 282 | if value == 1: 283 | return {"is_str": "", "is_float": 1.2, "is_int": None} 284 | elif value == 2: 285 | return {"is_float": 1, "is_int": 12} 286 | elif value == 3: 287 | return {"is_bytes": b"blah"} 288 | """ 289 | ) 290 | result = CliRunner().invoke( 291 | cli.cli, 292 | [ 293 | "lambda", 294 | db_path, 295 | "rows", 296 | "id", 297 | "--multi", 298 | "--code", 299 | code, 300 | ], 301 | ) 302 | assert result.exit_code == 0, result.output 303 | assert list(db["rows"].rows) == [ 304 | {"id": 1, "is_str": "", "is_float": 1.2, "is_int": None, "is_bytes": None}, 305 | {"id": 2, "is_str": None, "is_float": 1.0, "is_int": 12, "is_bytes": None}, 306 | { 307 | "id": 3, 308 | "is_str": None, 309 | "is_float": None, 310 | "is_int": None, 311 | "is_bytes": b"blah", 312 | }, 313 | {"id": 4, "is_str": None, "is_float": None, "is_int": None, "is_bytes": None}, 314 | ] 315 | assert db["rows"].schema == ( 316 | "CREATE TABLE [rows] (\n" 317 | " [id] INTEGER PRIMARY KEY\n" 318 | ", [is_str] TEXT, [is_float] FLOAT, [is_int] INTEGER, [is_bytes] BLOB)" 319 | ) 320 | -------------------------------------------------------------------------------- /tests/test_parsedate.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from sqlite_transform import cli 3 | import pytest 4 | 5 | 6 | def test_parsedate(test_db_and_path): 7 | db, db_path = test_db_and_path 8 | result = CliRunner().invoke(cli.cli, ["parsedate", db_path, "example", "dt"]) 9 | assert 0 == result.exit_code, result.output 10 | assert [ 11 | {"id": 1, "dt": "2019-10-05"}, 12 | {"id": 2, "dt": "2019-10-06"}, 13 | {"id": 3, "dt": ""}, 14 | {"id": 4, "dt": None}, 15 | ] == list(db["example"].rows) 16 | 17 | 18 | def test_parsedatetime(test_db_and_path): 19 | db, db_path = test_db_and_path 20 | result = CliRunner().invoke(cli.cli, ["parsedatetime", db_path, "example", "dt"]) 21 | assert 0 == result.exit_code, result.output 22 | assert [ 23 | {"id": 1, "dt": "2019-10-05T12:04:00"}, 24 | {"id": 2, "dt": "2019-10-06T00:05:06"}, 25 | {"id": 3, "dt": ""}, 26 | {"id": 4, "dt": None}, 27 | ] == list(db["example"].rows) 28 | 29 | 30 | @pytest.mark.parametrize("command", ("parsedate", "parsedatetime", "jsonsplit")) 31 | def test_column_required(test_db_and_path, command): 32 | _, db_path = test_db_and_path 33 | result = CliRunner().invoke(cli.cli, [command, db_path, "example"]) 34 | assert result.exit_code == 2, result.output 35 | assert "Error: Missing argument 'COLUMNS...'" in result.output 36 | 37 | 38 | @pytest.mark.parametrize("command", ("parsedate", "parsedatetime", "jsonsplit")) 39 | def test_cannot_use_drop_without_multi_or_output(test_db_and_path, command): 40 | _, db_path = test_db_and_path 41 | result = CliRunner().invoke(cli.cli, [command, db_path, "example", "id", "--drop"]) 42 | assert result.exit_code == 1, result.output 43 | assert "Error: --drop can only be used with --output or --multi" in result.output 44 | 45 | 46 | @pytest.mark.parametrize( 47 | "command,options,expected", 48 | ( 49 | ("parsedate", [], "2005-03-04"), 50 | ("parsedate", ["--dayfirst"], "2005-04-03"), 51 | ("parsedatetime", [], "2005-03-04T00:00:00"), 52 | ("parsedatetime", ["--dayfirst"], "2005-04-03T00:00:00"), 53 | ), 54 | ) 55 | def test_dayfirst_yearfirst(fresh_db_and_path, command, options, expected): 56 | db, db_path = fresh_db_and_path 57 | db["example"].insert_all( 58 | [ 59 | {"id": 1, "dt": "03/04/05"}, 60 | ], 61 | pk="id", 62 | ) 63 | result = CliRunner().invoke(cli.cli, [command, db_path, "example", "dt"] + options) 64 | assert result.exit_code == 0 65 | assert list(db["example"].rows) == [ 66 | {"id": 1, "dt": expected}, 67 | ] 68 | 69 | 70 | def test_parsedatetime_output(test_db_and_path): 71 | db, db_path = test_db_and_path 72 | result = CliRunner().invoke( 73 | cli.cli, ["parsedatetime", db_path, "example", "dt", "--output", "parsed"] 74 | ) 75 | assert result.exit_code == 0, result.output 76 | assert list(db["example"].rows) == [ 77 | {"id": 1, "dt": "5th October 2019 12:04", "parsed": "2019-10-05T12:04:00"}, 78 | {"id": 2, "dt": "6th October 2019 00:05:06", "parsed": "2019-10-06T00:05:06"}, 79 | {"id": 3, "dt": "", "parsed": ""}, 80 | {"id": 4, "dt": None, "parsed": None}, 81 | ] 82 | 83 | 84 | def test_parsedatetime_output_drop(test_db_and_path): 85 | db, db_path = test_db_and_path 86 | result = CliRunner().invoke( 87 | cli.cli, 88 | ["parsedatetime", db_path, "example", "dt", "--output", "parsed", "--drop"], 89 | ) 90 | assert result.exit_code == 0, result.output 91 | assert list(db["example"].rows) == [ 92 | {"id": 1, "parsed": "2019-10-05T12:04:00"}, 93 | {"id": 2, "parsed": "2019-10-06T00:05:06"}, 94 | {"id": 3, "parsed": ""}, 95 | {"id": 4, "parsed": None}, 96 | ] 97 | --------------------------------------------------------------------------------