├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── setup.py
├── sqlite_transform
    ├── __init__.py
    └── cli.py
└── tests
    ├── conftest.py
    ├── test_jsonsplit.py
    ├── test_lambda.py
    └── test_parsedate.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         python-version: [3.6, 3.7, 3.8, 3.9]
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 |     - uses: actions/cache@v2
20 |       name: Configure pip caching
21 |       with:
22 |         path: ~/.cache/pip
23 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
24 |         restore-keys: |
25 |           ${{ runner.os }}-pip-
26 |     - name: Install dependencies
27 |       run: |
28 |         pip install -e '.[test]'
29 |     - name: Run tests
30 |       run: |
31 |         pytest
32 |   deploy:
33 |     runs-on: ubuntu-latest
34 |     needs: [test]
35 |     steps:
36 |     - uses: actions/checkout@v2
37 |     - name: Set up Python
38 |       uses: actions/setup-python@v2
39 |       with:
40 |         python-version: '3.9'
41 |     - uses: actions/cache@v2
42 |       name: Configure pip caching
43 |       with:
44 |         path: ~/.cache/pip
45 |         key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }}
46 |         restore-keys: |
47 |           ${{ runner.os }}-publish-pip-
48 |     - name: Install dependencies
49 |       run: |
50 |         pip install setuptools wheel twine
51 |     - name: Publish
52 |       env:
53 |         TWINE_USERNAME: __token__
54 |         TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
55 |       run: |
56 |         python setup.py sdist bdist_wheel
57 |         twine upload dist/*
58 | 
59 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: [3.6, 3.7, 3.8, 3.9]
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - uses: actions/cache@v2
18 |       name: Configure pip caching
19 |       with:
20 |         path: ~/.cache/pip
21 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
22 |         restore-keys: |
23 |           ${{ runner.os }}-pip-
24 |     - name: Install dependencies
25 |       run: |
26 |         pip install -e '.[test]'
27 |     - name: Run tests
28 |       run: |
29 |         pytest
30 | 
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | venv
6 | .eggs
7 | .pytest_cache
8 | *.egg-info


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # sqlite-transform
  2 | 
  3 | ![No longer maintained](https://img.shields.io/badge/no%20longer-maintained-red)
  4 | [![PyPI](https://img.shields.io/pypi/v/sqlite-transform.svg)](https://pypi.org/project/sqlite-transform/)
  5 | [![Changelog](https://img.shields.io/github/v/release/simonw/sqlite-transform?include_prereleases&label=changelog)](https://github.com/simonw/sqlite-transform/releases)
  6 | [![Tests](https://github.com/simonw/sqlite-transform/workflows/Test/badge.svg)](https://github.com/simonw/sqlite-transform/actions?query=workflow%3ATest)
  7 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/dogsheep/sqlite-transform/blob/main/LICENSE)
  8 | 
  9 | Tool for running transformations on columns in a SQLite database.
 10 | 
 11 | > **:warning: This tool is no longer maintained**
 12 | >
 13 | > I added a new tool to [sqlite-utils](https://sqlite-utils.datasette.io/) called [sqlite-utils convert](https://sqlite-utils.datasette.io/en/stable/cli.html#converting-data-in-columns) which provides a super-set of the functionality originally provided here. `sqlite-transform` is no longer maintained, and I recommend switching to using `sqlite-utils convert` instead.
 14 | 
 15 | ## How to install
 16 | 
 17 |     pip install sqlite-transform
 18 | 
 19 | ## parsedate and parsedatetime
 20 | 
 21 | These subcommands will run all values in the specified column through `dateutils.parser.parse()` and replace them with the result, formatted as an ISO timestamp or ISO date.
 22 | 
 23 | For example, if a row in the database has an `opened` column which contains `10/10/2019 08:10:00 PM`, running the following command:
 24 | 
 25 |     sqlite-transform parsedatetime my.db mytable opened
 26 | 
 27 | Will result in that value being replaced by `2019-10-10T20:10:00`.
 28 | 
 29 | Using the `parsedate` subcommand here would result in `2019-10-10` instead.
 30 | 
 31 | In the case of ambiguous dates such as `03/04/05` these commands both default to assuming American-style `mm/dd/yy` format. You can pass `--dayfirst` to specify that the day should be assumed to be first, or `--yearfirst` for the year.
 32 | 
 33 | ## jsonsplit
 34 | 
 35 | The `jsonsplit` subcommand takes columns that contain a comma-separated list, for example a `tags` column containing records like `"trees,park,dogs"` and converts it into a JSON array `["trees", "park", "dogs"]`.
 36 | 
 37 | This is useful for taking advantage of Datasette's [Facet by JSON array](https://docs.datasette.io/en/stable/facets.html#facet-by-json-array) feature.
 38 | 
 39 |     sqlite-transform jsonsplit my.db mytable tags
 40 | 
 41 | It defaults to splitting on commas, but you can specify a different delimiter character using the `--delimiter` option, for example:
 42 | 
 43 |     sqlite-transform jsonsplit \
 44 |         my.db mytable tags --delimiter ';'
 45 | 
 46 | Values within the array will be treated as strings, so a column containing `123,552,775` will be converted into the JSON array `["123", "552", "775"]`.
 47 | 
 48 | You can specify a different type for these values using `--type int` or `--type float`, for example:
 49 | 
 50 |     sqlite-transform jsonsplit \
 51 |         my.db mytable tags --type int
 52 | 
 53 | This will result in that column being converted into `[123, 552, 775]`.
 54 | 
 55 | ## lambda for executing your own code
 56 | 
 57 | The `lambda` subcommand lets you specify Python code which will be executed against the column.
 58 | 
 59 | Here's how to convert a column to uppercase:
 60 | 
 61 |     sqlite-transform lambda my.db mytable mycolumn --code='str(value).upper()'
 62 | 
 63 | The code you provide will be compiled into a function that takes `value` as a single argument. You can break your function body into multiple lines, provided the last line is a `return` statement:
 64 | 
 65 |     sqlite-transform lambda my.db mytable mycolumn --code='value = str(value)
 66 |     return value.upper()'
 67 | 
 68 | You can also specify Python modules that should be imported and made available to your code using one or more `--import` options:
 69 | 
 70 |     sqlite-transform lambda my.db mytable mycolumn \
 71 |         --code='"\n".join(textwrap.wrap(value, 10))' \
 72 |         --import=textwrap
 73 | 
 74 | The `--dry-run` option will output a preview of the transformation against the first ten rows, without modifying the database.
 75 | 
 76 | ## Saving the result to a separate column
 77 | 
 78 | Each of these commands accepts optional `--output` and `--output-type` options. These can be used to save the result of the transformation to a separate column, which will be created if the column does not already exist.
 79 | 
 80 | To save the result of `jsonsplit` to a new column called `json_tags`, use the following:
 81 | 
 82 |     sqlite-transform jsonsplit my.db mytable tags \
 83 |       --output json_tags
 84 | 
 85 | The type of the created column defaults to `text`, but a different column type can be specified using `--output-type`. This example will create a new floating point column called `float_id` with a copy of each item's ID increased by 0.5:
 86 | 
 87 |     sqlite-transform lambda my.db mytable id \
 88 |       --code 'float(value) + 0.5' \
 89 |       --output float_id \
 90 |       --output-type float
 91 | 
 92 | You can drop the original column at the end of the operation by adding `--drop`.
 93 | 
 94 | ## Splitting a column into multiple columns
 95 | 
 96 | Sometimes you may wish to convert a single column into multiple derived columns. For example, you may have a `location` column containing `latitude,longitude` values which you wish to split out into separate `latitude` and `longitude` columns.
 97 | 
 98 | You can achieve this using the `--multi` option to `sqlite-transform lambda`. This option expects your `--code` function to return a Python dictionary: new columns well be created and populated for each of the keys in that dictionary.
 99 | 
100 | For the `latitude,longitude` example you would use the following:
101 | 
102 |     sqlite-transform lambda demo.db places location \
103 |       --code 'return {
104 |         "latitude": float(value.split(",")[0]),
105 |         "longitude": float(value.split(",")[1]),
106 |       }' --multi
107 | 
108 | The type of the returned values will be taken into account when creating the new columns. In this example, the resulting database schema will look like this:
109 | 
110 | ```sql
111 | CREATE TABLE [places] (
112 |     [location] TEXT,
113 |     [latitude] FLOAT,
114 |     [longitude] FLOAT
115 | );
116 | ```
117 | The code function can also return `None`, in which case its output will be ignored.
118 | 
119 | You can drop the original column at the end of the operation by adding `--drop`.
120 | 
121 | ## Disabling the progress bar
122 | 
123 | By default each command will show a progress bar. Pass `-s` or `--silent` to hide that progress bar.
124 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import os
 3 | 
 4 | VERSION = "1.2.1"
 5 | 
 6 | 
 7 | def get_long_description():
 8 |     with open(
 9 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"),
10 |         encoding="utf8",
11 |     ) as fp:
12 |         return fp.read()
13 | 
14 | 
15 | setup(
16 |     name="sqlite-transform",
17 |     description="Tool for running transformations on columns in a SQLite database.",
18 |     long_description=get_long_description(),
19 |     long_description_content_type="text/markdown",
20 |     author="Simon Willison",
21 |     url="https://github.com/simonw/sqlite-transform",
22 |     license="Apache License, Version 2.0",
23 |     version=VERSION,
24 |     packages=["sqlite_transform"],
25 |     entry_points="""
26 |         [console_scripts]
27 |         sqlite-transform=sqlite_transform.cli:cli
28 |     """,
29 |     install_requires=["dateutils", "tqdm", "click", "sqlite-utils"],
30 |     extras_require={"test": ["pytest"]},
31 |     tests_require=["sqlite-transform[test]"],
32 | )
33 | 


--------------------------------------------------------------------------------
/sqlite_transform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonw/sqlite-transform/049b2fcb6671f86a0916392e0a84dedc160d8fa3/sqlite_transform/__init__.py


--------------------------------------------------------------------------------
/sqlite_transform/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | from dateutil import parser
  3 | import json
  4 | import sqlite3
  5 | import sqlite_utils
  6 | import tqdm
  7 | 
  8 | sqlite3.enable_callback_tracebacks(True)
  9 | 
 10 | 
 11 | def common_options(fn):
 12 |     click.option("-s", "--silent", is_flag=True, help="Don't show a progress bar")(fn)
 13 |     click.option("--drop", is_flag=True, help="Drop original column afterwards")(fn)
 14 |     click.option(
 15 |         "--output-type",
 16 |         help="Column type to use for the output column",
 17 |         default="text",
 18 |         type=click.Choice(["integer", "float", "blob", "text"]),
 19 |     )(fn)
 20 |     click.option(
 21 |         "--output", help="Optional separate column to populate with the output"
 22 |     )(fn)
 23 |     return fn
 24 | 
 25 | 
 26 | @click.group()
 27 | @click.version_option()
 28 | def cli():
 29 |     "Tool for running transformations on columns in a SQLite database."
 30 | 
 31 | 
 32 | @cli.command()
 33 | @click.argument(
 34 |     "db_path",
 35 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
 36 |     required=True,
 37 | )
 38 | @click.argument("table", type=str)
 39 | @click.argument("columns", type=str, nargs=-1, required=True)
 40 | @click.option(
 41 |     "--dayfirst",
 42 |     is_flag=True,
 43 |     help="Assume day comes first in ambiguous dates, e.g. 03/04/05",
 44 | )
 45 | @click.option(
 46 |     "--yearfirst",
 47 |     is_flag=True,
 48 |     help="Assume year comes first in ambiguous dates, e.g. 03/04/05",
 49 | )
 50 | @common_options
 51 | def parsedate(
 52 |     db_path, table, columns, dayfirst, yearfirst, output, output_type, drop, silent
 53 | ):
 54 |     """
 55 |     Parse and convert columns to ISO dates
 56 |     """
 57 |     _transform(
 58 |         db_path,
 59 |         table,
 60 |         columns,
 61 |         lambda v: parser.parse(v, dayfirst=dayfirst, yearfirst=yearfirst)
 62 |         .date()
 63 |         .isoformat(),
 64 |         output,
 65 |         output_type,
 66 |         drop,
 67 |         silent,
 68 |     )
 69 | 
 70 | 
 71 | @cli.command()
 72 | @click.argument(
 73 |     "db_path",
 74 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
 75 |     required=True,
 76 | )
 77 | @click.argument("table", type=str)
 78 | @click.argument("columns", type=str, nargs=-1, required=True)
 79 | @click.option(
 80 |     "--dayfirst",
 81 |     is_flag=True,
 82 |     help="Assume day comes first in ambiguous dates, e.g. 03/04/05",
 83 | )
 84 | @click.option(
 85 |     "--yearfirst",
 86 |     is_flag=True,
 87 |     help="Assume year comes first in ambiguous dates, e.g. 03/04/05",
 88 | )
 89 | @common_options
 90 | def parsedatetime(
 91 |     db_path, table, columns, dayfirst, yearfirst, output, output_type, drop, silent
 92 | ):
 93 |     """
 94 |     Parse and convert columns to ISO timestamps
 95 |     """
 96 |     _transform(
 97 |         db_path,
 98 |         table,
 99 |         columns,
100 |         lambda v: parser.parse(v, dayfirst=dayfirst, yearfirst=yearfirst).isoformat(),
101 |         output,
102 |         output_type,
103 |         drop,
104 |         silent,
105 |     )
106 | 
107 | 
108 | @cli.command()
109 | @click.argument(
110 |     "db_path",
111 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
112 |     required=True,
113 | )
114 | @click.argument("table", type=str)
115 | @click.argument("columns", type=str, nargs=-1, required=True)
116 | @click.option("--delimiter", default=",", help="Delimiter to split on")
117 | @click.option(
118 |     "--type",
119 |     type=click.Choice(("int", "float")),
120 |     help="Type to use for values - int or float (defaults to string)",
121 | )
122 | @common_options
123 | def jsonsplit(
124 |     db_path, table, columns, delimiter, type, output, output_type, drop, silent
125 | ):
126 |     """
127 |     Convert columns into JSON arrays by splitting on a delimiter
128 |     """
129 |     value_convert = lambda s: s.strip()
130 |     if type == "int":
131 |         value_convert = lambda s: int(s.strip())
132 |     elif type == "float":
133 |         value_convert = lambda s: float(s.strip())
134 | 
135 |     def convert(value):
136 |         return json.dumps([value_convert(s) for s in value.split(delimiter)])
137 | 
138 |     _transform(db_path, table, columns, convert, output, output_type, drop, silent)
139 | 
140 | 
141 | @cli.command(name="lambda")
142 | @click.argument(
143 |     "db_path",
144 |     type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
145 |     required=True,
146 | )
147 | @click.argument("table", type=str)
148 | @click.argument("columns", type=str, nargs=-1, required=True)
149 | @click.option(
150 |     "--code", type=str, required=True, help="Python code to transform 'value'"
151 | )
152 | @click.option(
153 |     "--import", "imports", type=str, multiple=True, help="Python modules to import"
154 | )
155 | @click.option(
156 |     "--dry-run", is_flag=True, help="Show results of running this against first 10 rows"
157 | )
158 | @click.option(
159 |     "--multi", is_flag=True, help="Populate columns for keys in returned dictionary"
160 | )
161 | @common_options
162 | def lambda_(
163 |     db_path,
164 |     table,
165 |     columns,
166 |     code,
167 |     imports,
168 |     dry_run,
169 |     multi,
170 |     output,
171 |     output_type,
172 |     drop,
173 |     silent,
174 | ):
175 |     """
176 |     Transform columns using Python code you supply. For example:
177 | 
178 |     \b
179 |     $ sqlite-transform lambda my.db mytable mycolumn
180 |         --code='"\\n".join(textwrap.wrap(value, 10))'
181 |         --import=textwrap
182 | 
183 |     "value" is a variable with the column value to be transformed.
184 |     """
185 |     if output is not None and len(columns) > 1:
186 |         raise click.ClickException("Cannot use --output with more than one column")
187 |     if multi and len(columns) > 1:
188 |         raise click.ClickException("Cannot use --multi with more than one column")
189 |     # If single line and no 'return', add the return
190 |     if "\n" not in code and not code.strip().startswith("return "):
191 |         code = "return {}".format(code)
192 |     # Compile the code into a function body called fn(value)
193 |     new_code = ["def fn(value):"]
194 |     for line in code.split("\n"):
195 |         new_code.append("    {}".format(line))
196 |     code_o = compile("\n".join(new_code), "<string>", "exec")
197 |     locals = {}
198 |     globals = {}
199 |     for import_ in imports:
200 |         globals[import_] = __import__(import_)
201 |     exec(code_o, globals, locals)
202 |     fn = locals["fn"]
203 |     if dry_run:
204 |         # Pull first 20 values for first column and preview them
205 |         db = sqlite3.connect(db_path)
206 |         db.create_function("preview_transform", 1, lambda v: fn(v) if v else v)
207 |         sql = """
208 |             select
209 |                 [{column}] as value,
210 |                 preview_transform([{column}]) as preview
211 |             from [{table}] limit 10
212 |         """.format(
213 |             column=columns[0], table=table
214 |         )
215 |         for row in db.execute(sql).fetchall():
216 |             print(row[0])
217 |             print(" --- becomes:")
218 |             print(row[1])
219 |             print()
220 |     elif multi:
221 |         _transform_multi(db_path, table, columns[0], fn, drop, silent)
222 |     else:
223 |         _transform(db_path, table, columns, fn, output, output_type, drop, silent)
224 | 
225 | 
226 | def _transform(db_path, table, columns, fn, output, output_type, drop, silent):
227 |     db = sqlite_utils.Database(db_path)
228 |     count_sql = "select count(*) from [{}]".format(table)
229 |     todo_count = list(db.execute(count_sql).fetchall())[0][0] * len(columns)
230 | 
231 |     if drop and not output:
232 |         raise click.ClickException("--drop can only be used with --output or --multi")
233 | 
234 |     if output is not None:
235 |         if output not in db[table].columns_dict:
236 |             db[table].add_column(output, output_type or "text")
237 | 
238 |     with tqdm.tqdm(total=todo_count, disable=silent) as bar:
239 | 
240 |         def transform_value(v):
241 |             bar.update(1)
242 |             if not v:
243 |                 return v
244 |             return fn(v)
245 | 
246 |         db.register_function(transform_value)
247 |         sql = "update [{table}] set {sets};".format(
248 |             table=table,
249 |             sets=", ".join(
250 |                 [
251 |                     "[{output_column}] = transform_value([{column}])".format(
252 |                         output_column=output or column, column=column
253 |                     )
254 |                     for column in columns
255 |                 ]
256 |             ),
257 |         )
258 |         with db.conn:
259 |             db.execute(sql)
260 |             if drop:
261 |                 db[table].transform(drop=columns)
262 | 
263 | 
264 | def _transform_multi(db_path, table, column, fn, drop, silent):
265 |     db = sqlite_utils.Database(db_path)
266 |     # First we execute the function
267 |     pk_to_values = {}
268 |     new_column_types = {}
269 |     pks = [column.name for column in db[table].columns if column.is_pk]
270 |     if not pks:
271 |         pks = ["rowid"]
272 |     with tqdm.tqdm(total=db[table].count, disable=silent, desc="1: Evaluating") as bar:
273 |         for row in db[table].rows_where(
274 |             select=", ".join(
275 |                 "[{}]".format(column_name) for column_name in (pks + [column])
276 |             )
277 |         ):
278 |             row_pk = tuple(row[pk] for pk in pks)
279 |             if len(row_pk) == 1:
280 |                 row_pk = row_pk[0]
281 |             values = fn(row[column])
282 |             if values is not None and not isinstance(values, dict):
283 |                 raise click.ClickException(
284 |                     "With --multi code must return a Python dictionary - returned {}".format(
285 |                         repr(values)
286 |                     )
287 |                 )
288 |             if values:
289 |                 for key, value in values.items():
290 |                     new_column_types.setdefault(key, set()).add(type(value))
291 |                 pk_to_values[row_pk] = values
292 |             bar.update(1)
293 | 
294 |     # Add any new columns
295 |     columns_to_create = _suggest_column_types(new_column_types)
296 |     for column_name, column_type in columns_to_create.items():
297 |         if column_name not in db[table].columns_dict:
298 |             db[table].add_column(column_name, column_type)
299 | 
300 |     # Run the updates
301 |     with tqdm.tqdm(total=db[table].count, disable=silent, desc="2: Updating") as bar:
302 |         with db.conn:
303 |             for pk, updates in pk_to_values.items():
304 |                 db[table].update(pk, updates)
305 |                 bar.update(1)
306 |             if drop:
307 |                 db[table].transform(drop=(column,))
308 | 
309 | 
310 | def _suggest_column_types(all_column_types):
311 |     column_types = {}
312 |     for key, types in all_column_types.items():
313 |         # Ignore null values if at least one other type present:
314 |         if len(types) > 1:
315 |             types.discard(None.__class__)
316 |         if {None.__class__} == types:
317 |             t = str
318 |         elif len(types) == 1:
319 |             t = list(types)[0]
320 |             # But if it's a subclass of list / tuple / dict, use str
321 |             # instead as we will be storing it as JSON in the table
322 |             for superclass in (list, tuple, dict):
323 |                 if issubclass(t, superclass):
324 |                     t = str
325 |         elif {int, bool}.issuperset(types):
326 |             t = int
327 |         elif {int, float, bool}.issuperset(types):
328 |             t = float
329 |         elif {bytes, str}.issuperset(types):
330 |             t = bytes
331 |         else:
332 |             t = str
333 |         column_types[key] = t
334 |     return column_types
335 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import pytest
 3 | import sqlite_utils
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def test_db_and_path(fresh_db_and_path):
 8 |     db, db_path = fresh_db_and_path
 9 |     db["example"].insert_all(
10 |         [
11 |             {"id": 1, "dt": "5th October 2019 12:04"},
12 |             {"id": 2, "dt": "6th October 2019 00:05:06"},
13 |             {"id": 3, "dt": ""},
14 |             {"id": 4, "dt": None},
15 |         ],
16 |         pk="id",
17 |     )
18 |     return db, db_path
19 | 
20 | 
21 | @pytest.fixture
22 | def fresh_db_and_path(tmpdir):
23 |     db_path = str(pathlib.Path(tmpdir) / "data.db")
24 |     db = sqlite_utils.Database(db_path)
25 |     return db, db_path
26 | 


--------------------------------------------------------------------------------
/tests/test_jsonsplit.py:
--------------------------------------------------------------------------------
 1 | from click.testing import CliRunner
 2 | import json
 3 | import pathlib
 4 | import pytest
 5 | from sqlite_transform import cli
 6 | import sqlite_utils
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("delimiter", [None, ";", "-"])
10 | def test_jsonsplit(tmpdir, delimiter):
11 |     db_path = str(pathlib.Path(tmpdir) / "data.db")
12 |     db = sqlite_utils.Database(db_path)
13 |     db["example"].insert_all(
14 |         [
15 |             {"id": 1, "tags": (delimiter or ",").join(["foo", "bar"])},
16 |             {"id": 2, "tags": (delimiter or ",").join(["bar", "baz"])},
17 |         ],
18 |         pk="id",
19 |     )
20 |     args = ["jsonsplit", db_path, "example", "tags"]
21 |     if delimiter is not None:
22 |         args.extend(["--delimiter", delimiter])
23 |     result = CliRunner().invoke(cli.cli, args)
24 |     assert 0 == result.exit_code, result.output
25 |     assert list(db["example"].rows) == [
26 |         {"id": 1, "tags": '["foo", "bar"]'},
27 |         {"id": 2, "tags": '["bar", "baz"]'},
28 |     ]
29 | 
30 | 
31 | @pytest.mark.parametrize(
32 |     "type,expected_array",
33 |     (
34 |         (None, ["1", "2", "3"]),
35 |         ("float", [1.0, 2.0, 3.0]),
36 |         ("int", [1, 2, 3]),
37 |     ),
38 | )
39 | def test_jsonsplit_type(fresh_db_and_path, type, expected_array):
40 |     db, db_path = fresh_db_and_path
41 |     db["example"].insert_all(
42 |         [
43 |             {"id": 1, "records": "1,2,3"},
44 |         ],
45 |         pk="id",
46 |     )
47 |     args = ["jsonsplit", db_path, "example", "records"]
48 |     if type is not None:
49 |         args.extend(("--type", type))
50 |     result = CliRunner().invoke(cli.cli, args)
51 |     assert 0 == result.exit_code, result.output
52 |     assert json.loads(db["example"].get(1)["records"]) == expected_array
53 | 
54 | 
55 | @pytest.mark.parametrize("drop", (True, False))
56 | def test_jsonsplit_output(fresh_db_and_path, drop):
57 |     db, db_path = fresh_db_and_path
58 |     db["example"].insert_all(
59 |         [
60 |             {"id": 1, "records": "1,2,3"},
61 |         ],
62 |         pk="id",
63 |     )
64 |     args = ["jsonsplit", db_path, "example", "records", "--output", "tags"]
65 |     if drop:
66 |         args += ["--drop"]
67 |     result = CliRunner().invoke(cli.cli, args)
68 |     assert 0 == result.exit_code, result.output
69 |     expected = {
70 |         "id": 1,
71 |         "records": "1,2,3",
72 |         "tags": '["1", "2", "3"]',
73 |     }
74 |     if drop:
75 |         del expected["records"]
76 |     assert db["example"].get(1) == expected
77 | 


--------------------------------------------------------------------------------
/tests/test_lambda.py:
--------------------------------------------------------------------------------
  1 | from click.testing import CliRunner
  2 | from sqlite_transform import cli
  3 | import textwrap
  4 | import pytest
  5 | 
  6 | 
  7 | @pytest.mark.parametrize(
  8 |     "code",
  9 |     [
 10 |         "return value.replace('October', 'Spooktober')",
 11 |         # Return is optional:
 12 |         "value.replace('October', 'Spooktober')",
 13 |     ],
 14 | )
 15 | def test_lambda_single_line(test_db_and_path, code):
 16 |     db, db_path = test_db_and_path
 17 |     result = CliRunner().invoke(
 18 |         cli.cli, ["lambda", db_path, "example", "dt", "--code", code]
 19 |     )
 20 |     assert 0 == result.exit_code, result.output
 21 |     assert [
 22 |         {"id": 1, "dt": "5th Spooktober 2019 12:04"},
 23 |         {"id": 2, "dt": "6th Spooktober 2019 00:05:06"},
 24 |         {"id": 3, "dt": ""},
 25 |         {"id": 4, "dt": None},
 26 |     ] == list(db["example"].rows)
 27 | 
 28 | 
 29 | def test_lambda_multiple_lines(test_db_and_path):
 30 |     db, db_path = test_db_and_path
 31 |     result = CliRunner().invoke(
 32 |         cli.cli,
 33 |         [
 34 |             "lambda",
 35 |             db_path,
 36 |             "example",
 37 |             "dt",
 38 |             "--code",
 39 |             "v = value.replace('October', 'Spooktober')\nreturn v.upper()",
 40 |         ],
 41 |     )
 42 |     assert 0 == result.exit_code, result.output
 43 |     assert [
 44 |         {"id": 1, "dt": "5TH SPOOKTOBER 2019 12:04"},
 45 |         {"id": 2, "dt": "6TH SPOOKTOBER 2019 00:05:06"},
 46 |         {"id": 3, "dt": ""},
 47 |         {"id": 4, "dt": None},
 48 |     ] == list(db["example"].rows)
 49 | 
 50 | 
 51 | def test_lambda_import(test_db_and_path):
 52 |     db, db_path = test_db_and_path
 53 |     result = CliRunner().invoke(
 54 |         cli.cli,
 55 |         [
 56 |             "lambda",
 57 |             db_path,
 58 |             "example",
 59 |             "dt",
 60 |             "--code",
 61 |             "return re.sub('O..', 'OXX', value)",
 62 |             "--import",
 63 |             "re",
 64 |         ],
 65 |     )
 66 |     assert 0 == result.exit_code, result.output
 67 |     assert [
 68 |         {"id": 1, "dt": "5th OXXober 2019 12:04"},
 69 |         {"id": 2, "dt": "6th OXXober 2019 00:05:06"},
 70 |         {"id": 3, "dt": ""},
 71 |         {"id": 4, "dt": None},
 72 |     ] == list(db["example"].rows)
 73 | 
 74 | 
 75 | def test_lambda_dryrun(test_db_and_path):
 76 |     db, db_path = test_db_and_path
 77 |     result = CliRunner().invoke(
 78 |         cli.cli,
 79 |         [
 80 |             "lambda",
 81 |             db_path,
 82 |             "example",
 83 |             "dt",
 84 |             "--code",
 85 |             "return re.sub('O..', 'OXX', value)",
 86 |             "--import",
 87 |             "re",
 88 |             "--dry-run",
 89 |         ],
 90 |     )
 91 |     assert result.exit_code == 0
 92 |     assert result.output.strip() == (
 93 |         "5th October 2019 12:04\n"
 94 |         " --- becomes:\n"
 95 |         "5th OXXober 2019 12:04\n"
 96 |         "\n"
 97 |         "6th October 2019 00:05:06\n"
 98 |         " --- becomes:\n"
 99 |         "6th OXXober 2019 00:05:06\n"
100 |         "\n"
101 |         "\n"
102 |         " --- becomes:\n"
103 |         "\n"
104 |         "\n"
105 |         "None\n"
106 |         " --- becomes:\n"
107 |         "None"
108 |     )
109 |     # But it should not have actually modified the table data
110 |     assert list(db["example"].rows) == [
111 |         {"id": 1, "dt": "5th October 2019 12:04"},
112 |         {"id": 2, "dt": "6th October 2019 00:05:06"},
113 |         {"id": 3, "dt": ""},
114 |         {"id": 4, "dt": None},
115 |     ]
116 | 
117 | 
118 | @pytest.mark.parametrize("drop", (True, False))
119 | def test_lambda_output_column(test_db_and_path, drop):
120 |     db, db_path = test_db_and_path
121 |     args = [
122 |         "lambda",
123 |         db_path,
124 |         "example",
125 |         "dt",
126 |         "--code",
127 |         "value.replace('October', 'Spooktober')",
128 |         "--output",
129 |         "newcol",
130 |     ]
131 |     if drop:
132 |         args += ["--drop"]
133 |     result = CliRunner().invoke(cli.cli, args)
134 |     assert 0 == result.exit_code, result.output
135 |     expected = [
136 |         {
137 |             "id": 1,
138 |             "dt": "5th October 2019 12:04",
139 |             "newcol": "5th Spooktober 2019 12:04",
140 |         },
141 |         {
142 |             "id": 2,
143 |             "dt": "6th October 2019 00:05:06",
144 |             "newcol": "6th Spooktober 2019 00:05:06",
145 |         },
146 |         {"id": 3, "dt": "", "newcol": ""},
147 |         {"id": 4, "dt": None, "newcol": None},
148 |     ]
149 |     if drop:
150 |         for row in expected:
151 |             del row["dt"]
152 |     assert list(db["example"].rows) == expected
153 | 
154 | 
155 | @pytest.mark.parametrize(
156 |     "output_type,expected",
157 |     (
158 |         ("text", [(1, "1"), (2, "2"), (3, "3"), (4, "4")]),
159 |         ("float", [(1, 1.0), (2, 2.0), (3, 3.0), (4, 4.0)]),
160 |         ("integer", [(1, 1), (2, 2), (3, 3), (4, 4)]),
161 |         (None, [(1, "1"), (2, "2"), (3, "3"), (4, "4")]),
162 |     ),
163 | )
164 | def test_lambda_output_column_output_type(test_db_and_path, output_type, expected):
165 |     db, db_path = test_db_and_path
166 |     args = [
167 |         "lambda",
168 |         db_path,
169 |         "example",
170 |         "id",
171 |         "--code",
172 |         "value",
173 |         "--output",
174 |         "new_id",
175 |     ]
176 |     if output_type:
177 |         args += ["--output-type", output_type]
178 |     result = CliRunner().invoke(
179 |         cli.cli,
180 |         args,
181 |     )
182 |     assert 0 == result.exit_code, result.output
183 |     assert expected == list(db.execute("select id, new_id from example"))
184 | 
185 | 
186 | @pytest.mark.parametrize(
187 |     "options,expected_error",
188 |     [
189 |         (
190 |             [
191 |                 "dt",
192 |                 "id",
193 |                 "--code",
194 |                 "value.replace('October', 'Spooktober')",
195 |                 "--output",
196 |                 "newcol",
197 |             ],
198 |             "Cannot use --output with more than one column",
199 |         ),
200 |         (
201 |             [
202 |                 "dt",
203 |                 "--code",
204 |                 "value.replace('October', 'Spooktober')",
205 |                 "--output",
206 |                 "newcol",
207 |                 "--output-type",
208 |                 "invalid",
209 |             ],
210 |             "Error: Invalid value for '--output-type'",
211 |         ),
212 |         (
213 |             [
214 |                 "--code",
215 |                 "value.replace('October', 'Spooktober')",
216 |             ],
217 |             "Missing argument 'COLUMNS...'",
218 |         ),
219 |     ],
220 | )
221 | def test_lambda_output_error(test_db_and_path, options, expected_error):
222 |     db_path = test_db_and_path[1]
223 |     result = CliRunner().invoke(
224 |         cli.cli,
225 |         [
226 |             "lambda",
227 |             db_path,
228 |             "example",
229 |         ]
230 |         + options,
231 |     )
232 |     assert result.exit_code != 0
233 |     assert expected_error in result.output
234 | 
235 | 
236 | @pytest.mark.parametrize("drop", (True, False))
237 | def test_lambda_multi(fresh_db_and_path, drop):
238 |     db, db_path = fresh_db_and_path
239 |     db["creatures"].insert_all(
240 |         [
241 |             {"id": 1, "name": "Simon"},
242 |             {"id": 2, "name": "Cleo"},
243 |         ],
244 |         pk="id",
245 |     )
246 |     args = [
247 |         "lambda",
248 |         db_path,
249 |         "creatures",
250 |         "name",
251 |         "--multi",
252 |         "--code",
253 |         '{"upper": value.upper(), "lower": value.lower()}',
254 |     ]
255 |     if drop:
256 |         args += ["--drop"]
257 |     result = CliRunner().invoke(cli.cli, args)
258 |     assert result.exit_code == 0, result.output
259 |     expected = [
260 |         {"id": 1, "name": "Simon", "upper": "SIMON", "lower": "simon"},
261 |         {"id": 2, "name": "Cleo", "upper": "CLEO", "lower": "cleo"},
262 |     ]
263 |     if drop:
264 |         for row in expected:
265 |             del row["name"]
266 |     assert list(db["creatures"].rows) == expected
267 | 
268 | 
269 | def test_lambda_multi_complex_column_types(fresh_db_and_path):
270 |     db, db_path = fresh_db_and_path
271 |     db["rows"].insert_all(
272 |         [
273 |             {"id": 1},
274 |             {"id": 2},
275 |             {"id": 3},
276 |             {"id": 4},
277 |         ],
278 |         pk="id",
279 |     )
280 |     code = textwrap.dedent(
281 |         """
282 |     if value == 1:
283 |         return {"is_str": "", "is_float": 1.2, "is_int": None}
284 |     elif value == 2:
285 |         return {"is_float": 1, "is_int": 12}
286 |     elif value == 3:
287 |         return {"is_bytes": b"blah"}
288 |     """
289 |     )
290 |     result = CliRunner().invoke(
291 |         cli.cli,
292 |         [
293 |             "lambda",
294 |             db_path,
295 |             "rows",
296 |             "id",
297 |             "--multi",
298 |             "--code",
299 |             code,
300 |         ],
301 |     )
302 |     assert result.exit_code == 0, result.output
303 |     assert list(db["rows"].rows) == [
304 |         {"id": 1, "is_str": "", "is_float": 1.2, "is_int": None, "is_bytes": None},
305 |         {"id": 2, "is_str": None, "is_float": 1.0, "is_int": 12, "is_bytes": None},
306 |         {
307 |             "id": 3,
308 |             "is_str": None,
309 |             "is_float": None,
310 |             "is_int": None,
311 |             "is_bytes": b"blah",
312 |         },
313 |         {"id": 4, "is_str": None, "is_float": None, "is_int": None, "is_bytes": None},
314 |     ]
315 |     assert db["rows"].schema == (
316 |         "CREATE TABLE [rows] (\n"
317 |         "   [id] INTEGER PRIMARY KEY\n"
318 |         ", [is_str] TEXT, [is_float] FLOAT, [is_int] INTEGER, [is_bytes] BLOB)"
319 |     )
320 | 


--------------------------------------------------------------------------------
/tests/test_parsedate.py:
--------------------------------------------------------------------------------
 1 | from click.testing import CliRunner
 2 | from sqlite_transform import cli
 3 | import pytest
 4 | 
 5 | 
 6 | def test_parsedate(test_db_and_path):
 7 |     db, db_path = test_db_and_path
 8 |     result = CliRunner().invoke(cli.cli, ["parsedate", db_path, "example", "dt"])
 9 |     assert 0 == result.exit_code, result.output
10 |     assert [
11 |         {"id": 1, "dt": "2019-10-05"},
12 |         {"id": 2, "dt": "2019-10-06"},
13 |         {"id": 3, "dt": ""},
14 |         {"id": 4, "dt": None},
15 |     ] == list(db["example"].rows)
16 | 
17 | 
18 | def test_parsedatetime(test_db_and_path):
19 |     db, db_path = test_db_and_path
20 |     result = CliRunner().invoke(cli.cli, ["parsedatetime", db_path, "example", "dt"])
21 |     assert 0 == result.exit_code, result.output
22 |     assert [
23 |         {"id": 1, "dt": "2019-10-05T12:04:00"},
24 |         {"id": 2, "dt": "2019-10-06T00:05:06"},
25 |         {"id": 3, "dt": ""},
26 |         {"id": 4, "dt": None},
27 |     ] == list(db["example"].rows)
28 | 
29 | 
30 | @pytest.mark.parametrize("command", ("parsedate", "parsedatetime", "jsonsplit"))
31 | def test_column_required(test_db_and_path, command):
32 |     _, db_path = test_db_and_path
33 |     result = CliRunner().invoke(cli.cli, [command, db_path, "example"])
34 |     assert result.exit_code == 2, result.output
35 |     assert "Error: Missing argument 'COLUMNS...'" in result.output
36 | 
37 | 
38 | @pytest.mark.parametrize("command", ("parsedate", "parsedatetime", "jsonsplit"))
39 | def test_cannot_use_drop_without_multi_or_output(test_db_and_path, command):
40 |     _, db_path = test_db_and_path
41 |     result = CliRunner().invoke(cli.cli, [command, db_path, "example", "id", "--drop"])
42 |     assert result.exit_code == 1, result.output
43 |     assert "Error: --drop can only be used with --output or --multi" in result.output
44 | 
45 | 
46 | @pytest.mark.parametrize(
47 |     "command,options,expected",
48 |     (
49 |         ("parsedate", [], "2005-03-04"),
50 |         ("parsedate", ["--dayfirst"], "2005-04-03"),
51 |         ("parsedatetime", [], "2005-03-04T00:00:00"),
52 |         ("parsedatetime", ["--dayfirst"], "2005-04-03T00:00:00"),
53 |     ),
54 | )
55 | def test_dayfirst_yearfirst(fresh_db_and_path, command, options, expected):
56 |     db, db_path = fresh_db_and_path
57 |     db["example"].insert_all(
58 |         [
59 |             {"id": 1, "dt": "03/04/05"},
60 |         ],
61 |         pk="id",
62 |     )
63 |     result = CliRunner().invoke(cli.cli, [command, db_path, "example", "dt"] + options)
64 |     assert result.exit_code == 0
65 |     assert list(db["example"].rows) == [
66 |         {"id": 1, "dt": expected},
67 |     ]
68 | 
69 | 
70 | def test_parsedatetime_output(test_db_and_path):
71 |     db, db_path = test_db_and_path
72 |     result = CliRunner().invoke(
73 |         cli.cli, ["parsedatetime", db_path, "example", "dt", "--output", "parsed"]
74 |     )
75 |     assert result.exit_code == 0, result.output
76 |     assert list(db["example"].rows) == [
77 |         {"id": 1, "dt": "5th October 2019 12:04", "parsed": "2019-10-05T12:04:00"},
78 |         {"id": 2, "dt": "6th October 2019 00:05:06", "parsed": "2019-10-06T00:05:06"},
79 |         {"id": 3, "dt": "", "parsed": ""},
80 |         {"id": 4, "dt": None, "parsed": None},
81 |     ]
82 | 
83 | 
84 | def test_parsedatetime_output_drop(test_db_and_path):
85 |     db, db_path = test_db_and_path
86 |     result = CliRunner().invoke(
87 |         cli.cli,
88 |         ["parsedatetime", db_path, "example", "dt", "--output", "parsed", "--drop"],
89 |     )
90 |     assert result.exit_code == 0, result.output
91 |     assert list(db["example"].rows) == [
92 |         {"id": 1, "parsed": "2019-10-05T12:04:00"},
93 |         {"id": 2, "parsed": "2019-10-06T00:05:06"},
94 |         {"id": 3, "parsed": ""},
95 |         {"id": 4, "parsed": None},
96 |     ]
97 | 


--------------------------------------------------------------------------------